{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as mpatches\n", "import gc\n", "import time\n", "import subprocess\n", "import logging\n", "from concurrent.futures import ProcessPoolExecutor, as_completed" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from rdkit import Chem\n", "from rdkit.Chem import AllChem, DataStructs, Draw\n", "from rdkit import RDConfig\n", "from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges\n", "from rdkit.Chem.AllChem import GetMorganGenerator\n", "from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray\n", "from rdkit.Avalon.pyAvalonTools import GetAvalonFP" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-11-04 22:59:19.830835: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-11-04 22:59:19.845573: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-11-04 22:59:19.849643: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-11-04 22:59:19.860597: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-11-04 22:59:21.010480: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense, Dropout, Activation\n", "from tensorflow.keras.regularizers import l2\n", "from tensorflow.keras.optimizers import Adam\n", "from tensorflow.keras import regularizers" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import Ridge\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.neural_network import MLPRegressor\n", "from sklearn.svm import SVR\n", "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import optuna\n", "from optuna.trial import TrialState\n", "from optuna.integration import TFKerasPruningCallback" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from extra_code.feature_selection import selection_data_descriptor_compress, selection_fromStudy_compress" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730728761.540219 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n", "Your kernel may have been built without NUMA support.\n", "I0000 00:00:1730728761.611323 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n", "Your kernel may have been built without NUMA support.\n", "I0000 00:00:1730728761.611412 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n", "Your kernel may have been built without NUMA support.\n" ] } ], "source": [ "tf.keras.backend.clear_session()\n", "gpus = tf.config.experimental.list_physical_devices('GPU')\n", "if gpus:\n", " try:\n", " for gpu in gpus:\n", " tf.config.experimental.set_memory_growth(gpu, True)\n", " except RuntimeError as e:\n", " print(e)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "target_path = \"result/6_ANO_network_[fea_struc]\"\n", "os.makedirs(target_path, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "data_ws = pd.read_csv('./data/ws496_logS.csv', dtype={'SMILES': 'string'})\n", "smiles_ws = data_ws['SMILES']\n", "y_ws = data_ws.iloc[:, 2]\n", "\n", "data_delaney = pd.read_csv('./data/delaney-processed.csv', dtype={'smiles': 'string'})\n", "smiles_de = data_delaney['smiles']\n", "y_de = data_delaney.iloc[:, 1]\n", "\n", "data_lovric2020 = pd.read_csv('./data/Lovric2020_logS0.csv', dtype={'isomeric_smiles': 'string'})\n", "smiles_lo = data_lovric2020['isomeric_smiles']\n", "y_lo = data_lovric2020.iloc[:, 1]\n", "\n", "data_huuskonen = pd.read_csv('./data/huusk.csv', dtype={'SMILES': 'string'})\n", "smiles_hu = data_huuskonen['SMILES']\n", "y_hu = data_huuskonen.iloc[:, -1].astype('float')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def mol3d(mol):\n", " mol = Chem.AddHs(mol)\n", " optimization_methods = [\n", " (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),\n", " (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),\n", " (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})\n", " ]\n", "\n", " for method, args, kwargs in optimization_methods:\n", " try:\n", " method(*args, **kwargs)\n", " if mol.GetNumConformers() > 0:\n", " return mol\n", " except ValueError as e:\n", " print(f\"Error: {e} - Trying next optimization method [{method}]\")\n", "\n", " print(f\"Invalid mol for 3d {'\\033[94m'}{Chem.MolToSmiles(mol)}{'\\033[0m'} - No conformer generated\")\n", " return None" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def convert_smiles_to_mol(smiles, fail_folder=None, index=None, yvalue=None):\n", " mol = Chem.MolFromSmiles(smiles)\n", " if mol is None:\n", " print(f\"[convert_smiles_to_mol] Cannot convert {smiles} to Mols\")\n", " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": \"Invalid SMILES\"}\n", "\n", " try:\n", " Chem.Kekulize(mol, clearAromaticFlags=True)\n", " isomeric_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)\n", " mol = Chem.MolFromSmiles(isomeric_smiles)\n", " except Exception as e:\n", " print(f\"[convert_smiles_to_mol] failed {smiles} isomeric_smiles by {e}\")\n", " if fail_folder and index is not None:\n", " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n", " img = Draw.MolToImage(mol)\n", " img.save(img_path)\n", " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"Isomeric SMILES error: {e}\"}\n", "\n", " try:\n", " Chem.SanitizeMol(mol)\n", " except Exception as e:\n", " print(f\"[convert_smiles_to_mol] failed {smiles} SanitizeMol by {e}\")\n", " if fail_folder and index is not None:\n", " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n", " img = Draw.MolToImage(mol)\n", " img.save(img_path)\n", " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"SanitizeMol error: {e}\"}\n", "\n", " return mol, None" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def process_smiles(smiles, yvalue, fail_folder, index):\n", " mol, error = convert_smiles_to_mol(smiles, fail_folder, index, yvalue)\n", " if error:\n", " return None, None, error\n", "\n", " mol_3d = mol3d(mol)\n", " if mol_3d:\n", " return smiles, yvalue, None\n", " else:\n", " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n", " img = Draw.MolToImage(mol)\n", " img.save(img_path)\n", " return None, None, {\"smiles\": smiles, \"y_value\": yvalue}\n", "\n", "def process_dataset(smiles_list, y_values, dataset_name, target_path=\"result\", max_workers=None):\n", " start = time.time()\n", " valid_smiles, valid_y = [], []\n", " error_smiles_list = []\n", " fail_folder = f\"{target_path}/failed/{dataset_name}\"\n", " os.makedirs(fail_folder, exist_ok=True)\n", "\n", " with ProcessPoolExecutor(max_workers=max_workers) as executor:\n", " futures = [\n", " executor.submit(process_smiles, smiles, yvalue, fail_folder, i)\n", " for i, (smiles, yvalue) in enumerate(zip(smiles_list, y_values))\n", " ]\n", " for future in as_completed(futures):\n", " smiles, yvalue, error = future.result()\n", " if error:\n", " error_smiles_list.append(error)\n", " elif smiles is not None and yvalue is not None:\n", " valid_smiles.append(smiles)\n", " valid_y.append(yvalue)\n", "\n", " if error_smiles_list:\n", " error_df = pd.DataFrame(error_smiles_list)\n", " error_df.to_csv(os.path.join(fail_folder, \"failed_smiles.csv\"), index=False)\n", " print(f\" [{dataset_name:<10}] : {time.time()-start:.4f} sec\")\n", " return valid_smiles, valid_y" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " [ws496 ] : 0.8667 sec\n", " [delaney ] : 1.4338 sec\n", "Error: Bad Conformer Id - Trying next optimization method []\n", "Error: Bad Conformer Id - Trying next optimization method []\n", "Invalid mol for 3d \u001b[94m[H]O[C@@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n", "Error: Bad Conformer Id - Trying next optimization method []\n", "Error: Bad Conformer Id - Trying next optimization method []\n", "Invalid mol for 3d \u001b[94m[H]O[C@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n", " [Lovric2020_logS0] : 8.8955 sec\n", " [huusk ] : 1.5899 sec\n" ] } ], "source": [ "smiles_ws, y_ws = process_dataset(smiles_ws, y_ws, \"ws496\", target_path)\n", "smiles_de, y_de = process_dataset(smiles_de, y_de, \"delaney\", target_path)\n", "smiles_lo, y_lo = process_dataset(smiles_lo, y_lo, \"Lovric2020_logS0\", target_path)\n", "smiles_hu, y_hu = process_dataset(smiles_hu, y_hu, \"huusk\", target_path)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "LEN_OF_FF = 2048\n", "LEN_OF_MA = 167\n", "LEN_OF_AV = 512" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "def get_fingerprints(mol):\n", " if mol is None:\n", " return None, None, None\n", " \n", " morgan_generator = GetMorganGenerator(radius=2, fpSize=LEN_OF_FF)\n", " ecfp = morgan_generator.GetFingerprint(mol)\n", " ecfp_array = np.zeros((LEN_OF_FF,),dtype=int)\n", " DataStructs.ConvertToNumpyArray(ecfp, ecfp_array)\n", " \n", " maccs = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol)\n", "\n", " avalon_fp = GetAvalonFP(mol)\n", " avalon_array = np.zeros((LEN_OF_AV,),dtype=int)\n", " DataStructs.ConvertToNumpyArray(avalon_fp, avalon_array)\n", " \n", " return ecfp_array, maccs, avalon_array\n", "\n", "def fp_converter(data, use_parallel=True):\n", " mols = [Chem.MolFromSmiles(smi) for smi in data]\n", " \n", " if use_parallel:\n", " try: \n", " with ProcessPoolExecutor() as executor:\n", " results = list(executor.map(get_fingerprints, mols))\n", " except Exception as e:\n", " print(f\"Parallel processing failed due to: {e}. Falling back to sequential processing.\")\n", " use_parallel = False\n", " \n", " if not use_parallel:\n", " results = [get_fingerprints(mol) for mol in mols]\n", " \n", " ECFP, MACCS, AvalonFP = zip(*results)\n", " \n", " ECFP_container = np.vstack([arr for arr in ECFP if arr is not None])\n", " MACCS_container = np.zeros((len(MACCS), LEN_OF_MA), dtype=int)\n", " AvalonFP_container = np.vstack([arr for arr in AvalonFP if arr is not None])\n", "\n", " for i, fp in enumerate(MACCS):\n", " if fp is not None:\n", " DataStructs.ConvertToNumpyArray(fp, MACCS_container[i])\n", " \n", " return mols, ECFP_container, MACCS_container, AvalonFP_container" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mol_ws, x_ws, MACCS_ws, AvalonFP_ws = fp_converter(smiles_ws,target_path)\n", "mol_de, x_de, MACCS_de, AvalonFP_de = fp_converter(smiles_de,target_path)\n", "mol_lo, x_lo, MACCS_lo, AvalonFP_lo = fp_converter(smiles_lo,target_path)\n", "mol_hu, x_hu, MACCS_hu, AvalonFP_hu = fp_converter(smiles_hu,target_path)\n", "del smiles_ws\n", "del smiles_de\n", "del smiles_lo\n", "del smiles_hu\n", "gc.collect()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "def concatenate_to_numpy(*dataframes):\n", " numpy_arrays = [df.to_numpy() if isinstance(df, pd.DataFrame) else df for df in dataframes]\n", " if not all(isinstance(arr, np.ndarray) for arr in numpy_arrays):\n", " raise ValueError(\"All inputs must be either pandas DataFrame or numpy array\")\n", " return np.concatenate(numpy_arrays, axis=1)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "group_nws = concatenate_to_numpy(x_ws, MACCS_ws, AvalonFP_ws)\n", "group_nde = concatenate_to_numpy(x_de, MACCS_de, AvalonFP_de)\n", "group_nlo = concatenate_to_numpy(x_lo, MACCS_lo, AvalonFP_lo)\n", "group_nhu = concatenate_to_numpy(x_hu, MACCS_hu, AvalonFP_hu)\n", "del x_ws, MACCS_ws, AvalonFP_ws\n", "del x_de, MACCS_de, AvalonFP_de\n", "del x_lo, MACCS_lo, AvalonFP_lo\n", "del x_hu, MACCS_hu, AvalonFP_hu\n", "gc.collect()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "try:\n", " storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n", " # storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n", " # storage = optuna.storages.RDBStorage(url=storage_urls)\n", "except Exception as e:\n", " print(f\"Error occured: {e}\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best trial for study 'ANO_ws_feature':\n", "Best trial value: 0.932153\n", "Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 0}\n", "Generated fea: [1 1 1 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 1 1 1\n", " 1 0 1 0 1 0 1 1 0 0 0 0]\n", "Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n" ] } ], "source": [ "try:\n", " ws_fea = selection_fromStudy_compress('ANO_ws_feature', storage)\n", "except Exception as e:\n", " print(f\"Error occured: {e}\") \n", " ws_fea =[\n", " 1, # 1 - \"MolWeight\"\n", " 1, # 2 - \"Mol_logP\"\n", " 1, # 3 - \"Mol_MR\"\n", " 1, # 4 - \"Mol_TPSA\"\n", " 0, # 5 - \"NumRotatableBonds\"\n", " 0, # 6 - \"HeavyAtomCount\"\n", " 0, # 7 - \"NumHAcceptors\"\n", " 0, # 8 - \"NumHDonors\"\n", " 0, # 9 - \"NumHeteroatoms\"\n", " 1, # 10 - \"NumValenceElec\"\n", " 1, # 11 - \"NHOHCount\"\n", " 1, # 12 - \"NOCount\"\n", " 0, # 13 - \"RingCount\"\n", " 1, # 14 - \"NumAromaticRings\"\n", " 0, # 15 - \"NumSaturatedRings\"\n", " 0, # 16 - \"NumAliphaticRings\"\n", " 0, # 17 - \"LabuteASA\"\n", " 0, # 18 - \"NumValenceElectrons\"\n", " 1, # 19 - \"BalabanJ\"\n", " 1, # 20 - \"BertzCT\"\n", " 0, # 21 - \"Ipc\"\n", " 0, # 22 - \"kappa_Series[1-3]_ind\"\n", " 1, # 23 - \"Chi_Series[13]_ind\"\n", " 1, # 24 - \"Phi\"\n", " 0, # 25 - \"HallKierAlpha\"\n", " 0, # 26 - \"NumAmideBonds\"\n", " 1, # 27 - \"FractionCSP3\"\n", " 0, # 28 - \"NumSpiroAtoms\"\n", " 1, # 29 - \"NumBridgeheadAtoms\"\n", " 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n", " 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n", " 0, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n", " 1, # 33 - \"EState_VSA_Series[1-11]_ind\"\n", " 0, # 34 - \"VSA_EState_Series[1-10]_ind\"\n", " 0, # 35 - \"Asphericity\"\n", " 1, # 36 - \"PBF\"\n", " 0, # 37 - \"RadiusOfGyration\"\n", " 0, # 38 - \"InertialShapeFactor\"\n", " 1, # 39 - \"Eccentricity\"\n", " 0, # 40 - \"SpherocityIndex\"\n", " 1, # 41 - \"PMI_series[1-3]_ind\"\n", " 0, # 42 - \"NPR_series[1-2]_ind\"\n", " 0, # 43 - \"MQNs\"\n", " 0, # 44 - \"AUTOCORR2D\"\n", " 1, # 45 - \"BCUT2D\"\n", " 0, # 46 - \"AUTOCORR3D\"\n", " 1, # 47 - \"RDF\"\n", " 0, # 48 - \"MORSE\"\n", " 1, # 49 - \"WHIM\"\n", " 0, # 50 - \"GETAWAY\" \n", " ]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best trial for study 'ANO_de_feature':\n", "Best trial value: 0.973052\n", "Best trial parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 1, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 0, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 1}\n", "Generated fea: [1 1 1 1 1 1 0 0 1 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 0 0 1 1 1 1\n", " 1 1 0 1 1 1 0 0 0 1 0 1]\n", "Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n" ] } ], "source": [ "try:\n", " de_fea = selection_fromStudy_compress('ANO_de_feature', storage)\n", "except Exception as e:\n", " print(f\"Error occured: {e}\") \n", " de_fea =[\n", " 1, # 1 - \"MolWeight\"\n", " 1, # 2 - \"Mol_logP\"\n", " 1, # 3 - \"Mol_MR\"\n", " 1, # 4 - \"Mol_TPSA\"\n", " 0, # 5 - \"NumRotatableBonds\"\n", " 0, # 6 - \"HeavyAtomCount\"\n", " 1, # 7 - \"NumHAcceptors\"\n", " 1, # 8 - \"NumHDonors\"\n", " 0, # 9 - \"NumHeteroatoms\"\n", " 0, # 10 - \"NumValenceElec\"\n", " 1, # 11 - \"NHOHCount\"\n", " 0, # 12 - \"NOCount\"\n", " 0, # 13 - \"RingCount\"\n", " 0, # 14 - \"NumAromaticRings\"\n", " 0, # 15 - \"NumSaturatedRings\"\n", " 1, # 16 - \"NumAliphaticRings\"\n", " 1, # 17 - \"LabuteASA\"\n", " 0, # 18 - \"NumValenceElectrons\"\n", " 1, # 19 - \"BalabanJ\"\n", " 1, # 20 - \"BertzCT\"\n", " 1, # 21 - \"Ipc\"\n", " 0, # 22 - \"kappa_Series[1-3]_ind\"\n", " 0, # 23 - \"Chi_Series[13]_ind\"\n", " 0, # 24 - \"Phi\"\n", " 1, # 25 - \"HallKierAlpha\"\n", " 1, # 26 - \"NumAmideBonds\"\n", " 1, # 27 - \"FractionCSP3\"\n", " 1, # 28 - \"NumSpiroAtoms\"\n", " 0, # 29 - \"NumBridgeheadAtoms\"\n", " 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n", " 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n", " 0, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n", " 0, # 33 - \"EState_VSA_Series[1-11]_ind\"\n", " 0, # 34 - \"VSA_EState_Series[1-10]_ind\"\n", " 1, # 35 - \"Asphericity\"\n", " 0, # 36 - \"PBF\"\n", " 0, # 37 - \"RadiusOfGyration\"\n", " 0, # 38 - \"InertialShapeFactor\"\n", " 0, # 39 - \"Eccentricity\"\n", " 0, # 40 - \"SpherocityIndex\"\n", " 0, # 41 - \"PMI_series[1-3]_ind\"\n", " 1, # 42 - \"NPR_series[1-2]_ind\"\n", " 0, # 43 - \"MQNs\"\n", " 1, # 44 - \"AUTOCORR2D\"\n", " 1, # 45 - \"BCUT2D\"\n", " 0, # 46 - \"AUTOCORR3D\"\n", " 1, # 47 - \"RDF\"\n", " 0, # 48 - \"MORSE\"\n", " 1, # 49 - \"WHIM\"\n", " 0, # 50 - \"GETAWAY\" \n", " ]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best trial for study 'ANO_lo_feature':\n", "Best trial value: 0.843203\n", "Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]': 1, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 1, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 0}\n", "Generated fea: [1 1 1 1 0 0 0 1 0 0 0 0 0 1 1 1 0 1 0 1 1 1 1 1 0 0 0 1 0 0 1 0 1 1 1 1 0\n", " 0 1 1 1 1 1 0 0 0 1 0 0]\n", "Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n" ] } ], "source": [ "try:\n", " lo_fea = selection_fromStudy_compress('ANO_lo_feature', storage)\n", "except Exception as e:\n", " print(f\"Error occured: {e}\") \n", " lo_fea =[\n", " 1, # 1 - \"MolWeight\"\n", " 1, # 2 - \"Mol_logP\"\n", " 1, # 3 - \"Mol_MR\"\n", " 1, # 4 - \"Mol_TPSA\"\n", " 1, # 5 - \"NumRotatableBonds\"\n", " 0, # 6 - \"HeavyAtomCount\"\n", " 0, # 7 - \"NumHAcceptors\"\n", " 0, # 8 - \"NumHDonors\"\n", " 1, # 9 - \"NumHeteroatoms\"\n", " 1, # 10 - \"NumValenceElec\"\n", " 1, # 11 - \"NHOHCount\"\n", " 1, # 12 - \"NOCount\"\n", " 0, # 13 - \"RingCount\"\n", " 1, # 14 - \"NumAromaticRings\"\n", " 0, # 15 - \"NumSaturatedRings\"\n", " 0, # 16 - \"NumAliphaticRings\"\n", " 0, # 17 - \"LabuteASA\"\n", " 1, # 18 - \"NumValenceElectrons\"\n", " 0, # 19 - \"BalabanJ\"\n", " 0, # 20 - \"BertzCT\"\n", " 0, # 21 - \"Ipc\"\n", " 1, # 22 - \"kappa_Series[1-3]_ind\"\n", " 0, # 23 - \"Chi_Series[13]_ind\"\n", " 1, # 24 - \"Phi\"\n", " 1, # 25 - \"HallKierAlpha\"\n", " 0, # 26 - \"NumAmideBonds\"\n", " 1, # 27 - \"FractionCSP3\"\n", " 1, # 28 - \"NumSpiroAtoms\"\n", " 0, # 29 - \"NumBridgeheadAtoms\"\n", " 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n", " 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n", " 1, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n", " 0, # 33 - \"EState_VSA_Series[1-11]_ind\"\n", " 1, # 34 - \"VSA_EState_Series[1-10]_ind\"\n", " 1, # 35 - \"Asphericity\"\n", " 0, # 36 - \"PBF\"\n", " 1, # 37 - \"RadiusOfGyration\"\n", " 0, # 38 - \"InertialShapeFactor\"\n", " 0, # 39 - \"Eccentricity\"\n", " 1, # 40 - \"SpherocityIndex\"\n", " 0, # 41 - \"PMI_series[1-3]_ind\"\n", " 1, # 42 - \"NPR_series[1-2]_ind\"\n", " 0, # 43 - \"MQNs\"\n", " 0, # 44 - \"AUTOCORR2D\"\n", " 0, # 45 - \"BCUT2D\"\n", " 0, # 46 - \"AUTOCORR3D\"\n", " 1, # 47 - \"RDF\"\n", " 0, # 48 - \"MORSE\"\n", " 0, # 49 - \"WHIM\"\n", " 0, # 50 - \"GETAWAY\" \n", " ]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best trial for study 'ANO_hu_feature':\n", "Best trial value: 0.939862\n", "Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 1, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 1}\n", "Generated fea: [1 1 1 1 0 0 1 0 1 0 0 1 1 0 1 1 0 0 1 1 0 1 1 1 0 0 0 0 1 0 1 1 0 1 1 1 1\n", " 0 0 1 1 1 0 1 0 1 0 0 1]\n", "Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n" ] } ], "source": [ "try:\n", " hu_fea = selection_fromStudy_compress('ANO_hu_feature', storage)\n", "except Exception as e:\n", " print(f\"Error occured: {e}\") \n", " hu_fea =[\n", " 1, # 1 - \"MolWeight\"\n", " 1, # 2 - \"Mol_logP\"\n", " 1, # 3 - \"Mol_MR\"\n", " 1, # 4 - \"Mol_TPSA\"\n", " 0, # 5 - \"NumRotatableBonds\"\n", " 1, # 6 - \"HeavyAtomCount\"\n", " 0, # 7 - \"NumHAcceptors\"\n", " 1, # 8 - \"NumHDonors\"\n", " 1, # 9 - \"NumHeteroatoms\"\n", " 1, # 10 - \"NumValenceElec\"\n", " 0, # 11 - \"NHOHCount\"\n", " 1, # 12 - \"NOCount\"\n", " 1, # 13 - \"RingCount\"\n", " 1, # 14 - \"NumAromaticRings\"\n", " 1, # 15 - \"NumSaturatedRings\"\n", " 0, # 16 - \"NumAliphaticRings\"\n", " 0, # 17 - \"LabuteASA\"\n", " 0, # 18 - \"NumValenceElectrons\"\n", " 1, # 19 - \"BalabanJ\"\n", " 1, # 20 - \"BertzCT\"\n", " 1, # 21 - \"Ipc\"\n", " 0, # 22 - \"kappa_Series[1-3]_ind\"\n", " 1, # 23 - \"Chi_Series[13]_ind\"\n", " 1, # 24 - \"Phi\"\n", " 0, # 25 - \"HallKierAlpha\"\n", " 1, # 26 - \"NumAmideBonds\"\n", " 0, # 27 - \"FractionCSP3\"\n", " 1, # 28 - \"NumSpiroAtoms\"\n", " 0, # 29 - \"NumBridgeheadAtoms\"\n", " 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n", " 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n", " 1, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n", " 1, # 33 - \"EState_VSA_Series[1-11]_ind\"\n", " 1, # 34 - \"VSA_EState_Series[1-10]_ind\"\n", " 1, # 35 - \"Asphericity\"\n", " 1, # 36 - \"PBF\"\n", " 1, # 37 - \"RadiusOfGyration\"\n", " 1, # 38 - \"InertialShapeFactor\"\n", " 0, # 39 - \"Eccentricity\"\n", " 0, # 40 - \"SpherocityIndex\"\n", " 1, # 41 - \"PMI_series[1-3]_ind\"\n", " 1, # 42 - \"NPR_series[1-2]_ind\"\n", " 1, # 43 - \"MQNs\"\n", " 1, # 44 - \"AUTOCORR2D\"\n", " 1, # 45 - \"BCUT2D\"\n", " 1, # 46 - \"AUTOCORR3D\"\n", " 1, # 47 - \"RDF\"\n", " 0, # 48 - \"MORSE\"\n", " 0, # 49 - \"WHIM\"\n", " 0, # 50 - \"GETAWAY\" \n", " ]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BCUT2D calculation failed: ERROR: No Gasteiger Partial Charge parameters for Element: Sn Mode: sp3\n" ] }, { "data": { "text/plain": [ "0" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_ws = selection_data_descriptor_compress(ws_fea, group_nws, mol_ws, 'ws')\n", "new_de = selection_data_descriptor_compress(de_fea, group_nde, mol_de, 'de')\n", "new_lo = selection_data_descriptor_compress(lo_fea, group_nlo, mol_lo, 'lo')\n", "new_hu = selection_data_descriptor_compress(hu_fea, group_nhu, mol_hu, 'hu')\n", "del ws_fea, group_nws, mol_ws\n", "del de_fea, group_nde, mol_de\n", "del lo_fea, group_nlo, mol_lo\n", "del hu_fea, group_nhu, mol_hu\n", "gc.collect()\n", "# 6m 10.3s" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "import logging\n", "import warnings\n", "\n", "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n", "os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'\n", "os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'\n", "os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'\n", "os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'\n", "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n", "os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'\n", "os.environ['TF_NUMA_NODES'] = '1'\n", "\n", "warnings.filterwarnings('ignore')\n", "\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "logging.getLogger('tensorflow').setLevel(logging.ERROR)\n", "\n", "tf.get_logger().setLevel('ERROR')\n", "tf.autograph.set_verbosity(0)\n", "\n", "def suppress_warnings(condition=True):\n", " if condition:\n", " logging.getLogger('tensorflow').setLevel(logging.ERROR)\n", " os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", " else:\n", " logging.getLogger('tensorflow').setLevel(logging.WARNING)\n", " os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'\n", "\n", "suppress_warnings(condition=True)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "BATCHSIZE = 16\n", "EPOCHS = 1000\n", "# lr = 0.0001\n", "# decay = 1e-4" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# def new_model(trial):\n", "# n_layers = trial.suggest_int(\"n_layers\", 1, 3)\n", "# model = tf.keras.Sequential()\n", "# layer_dropout = trial.suggest_int(\"layer_dropout\", 0,1)\n", "# for i in range(n_layers):\n", "# num_hidden = trial.suggest_int(\"n_units_l_{}\".format(i), 2, 1e4-1)\n", "# num_decay = trial.suggest_categorical(\"n_decay_l_{}\".format(i), [1e-3,1e-4,1e-5])\n", "# model.add(\n", "# tf.keras.layers.Dense(\n", "# num_hidden,\n", "# activation=\"relu\",\n", "# kernel_initializer='glorot_uniform',\n", "# kernel_regularizer=tf.keras.regularizers.l2(num_decay),\n", "# )\n", "# )\n", "# if layer_dropout==1:\n", "# fdropout1 = trial.suggest_categorical(\"F_dropout_{}\".format(i),[0.1,0.2])\n", "# model.add(Dropout(rate=fdropout1))\n", "# if layer_dropout==0:\n", "# fdropout2 = trial.suggest_categorical(\"Final_dropout\",[0.1,0.2])\n", "# model.add(Dropout(rate=fdropout2))\n", "# model.add(Dense(units=1))\n", "# learningr = trial.suggest_categorical(\"Learning_rate\",[0.01,0.001,0.0001])\n", "# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),\n", "# loss=tf.keras.losses.MeanSquaredError(),\n", "# metrics=[tf.keras.losses.MeanSquaredError(),\n", "# tf.keras.losses.MeanAbsoluteError(),\n", "# tf.keras.metrics.RootMeanSquaredError()])\n", "# return model\n", "\n", "def search_model(trial, input_dim):\n", " n_layers = trial.suggest_int(\"n_layers\", 1, 3)\n", " model = tf.keras.Sequential()\n", " model.add(tf.keras.layers.Input(shape=(input_dim,)))\n", " layer_dropout = trial.suggest_int(\"layer_dropout\", 0, 1)\n", " \n", " for i in range(n_layers):\n", " num_hidden = trial.suggest_int(f\"n_units_l_{i}\", 2, 9999)\n", " num_decay = trial.suggest_categorical(f\"n_decay_l_{i}\", [1e-4,1e-5,1e-6])\n", " model.add(\n", " tf.keras.layers.Dense(\n", " num_hidden,\n", " # activation=\"relu\",\n", " kernel_initializer='glorot_uniform',\n", " kernel_regularizer=tf.keras.regularizers.l2(num_decay),\n", " )\n", " )\n", " model.add(tf.keras.layers.LeakyReLU(alpha=0.01))\n", " if layer_dropout == 1:\n", " fdropout1 = trial.suggest_categorical(f\"F_dropout_{i}\", [0.1, 0.2, 0.3])\n", " model.add(tf.keras.layers.Dropout(rate=fdropout1))\n", " \n", " if layer_dropout == 0:\n", " fdropout2 = trial.suggest_categorical(\"last_dropout\", [0.1, 0.2, 0.3])\n", " model.add(tf.keras.layers.Dropout(rate=fdropout2))\n", " \n", " model.add(tf.keras.layers.Dense(units=1))\n", " # # Colab\n", " # learningr = trial.suggest_categorical(\"Learning_rate\",[0.01,0.001,0.0001])\n", " # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),\n", " # loss=tf.keras.losses.MeanSquaredError(),\n", " # metrics=[tf.keras.losses.MeanSquaredError(),\n", " # tf.keras.losses.MeanAbsoluteError(),\n", " # tf.keras.metrics.RootMeanSquaredError()])\n", " return model\n", "\n", "\n", "def save_model(trial, x_data):\n", " model_path = \"save_model/full_model.keras\"\n", " \n", " if not os.path.exists(model_path):\n", " try:\n", " model = search_model(trial, x_data.shape[1])\n", " os.makedirs(\"save_model\", exist_ok=True)\n", " model.save(model_path)\n", " print(f\"Model successfully saved to {model_path}\")\n", " except Exception as e:\n", " print(f\"Error saving model: {e}\")\n", " else:\n", " print(f\"Model already exists at {model_path}\")\n", " os.remove(model_path)\n", " save_model(trial, x_data)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "xtr_fws, xte_fws, ytr_fws, yte_fws = train_test_split(new_ws, y_ws, test_size = 0.1, random_state = 42)\n", "xtr_fde, xte_fde, ytr_fde, yte_fde = train_test_split(new_de, y_de, test_size = 0.1, random_state = 42)\n", "xtr_flo, xte_flo, ytr_flo, yte_flo = train_test_split(new_lo, y_lo, test_size = 0.1, random_state = 42)\n", "xtr_fhu, xte_fhu, ytr_fhu, yte_fhu = train_test_split(new_hu, y_hu, test_size = 0.1, random_state = 42)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# # Colab\n", "# def preprocess_data(xtr, ytr):\n", "# dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))\n", "# dataset = dataset.shuffle(buffer_size=len(xtr)).batch(BATCHSIZE).cache().prefetch(tf.data.AUTOTUNE)\n", "# return dataset\n", "\n", "# cb = tf.keras.callbacks.EarlyStopping(\n", "# monitor='loss', \n", "# patience=5,\n", "# restore_best_weights=True,\n", "# # min_delta=0.001,\n", "# mode='min',\n", "# verbose=1\n", "# )" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "# # Colab\n", "# def objective_ws_network(trial):\n", "# tf.keras.backend.clear_session()\n", "# model = search_model(trial, xtr_fws.shape[1])\n", "# train_data = preprocess_data(xtr_fws, ytr_fws)\n", "# model.fit(\n", "# train_data,\n", "# batch_size=BATCHSIZE,\n", "# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n", "# epochs=EPOCHS,\n", "# verbose=0,\n", "# )\n", "# y_pred_search = model.predict(xte_fws, verbose=0)\n", "# score = r2_score(yte_fws, y_pred_search)\n", "# del model\n", "# tf.keras.backend.clear_session()\n", "# gc.collect()\n", "# return score" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "# # Colab\n", "# def objective_de_network(trial):\n", "# tf.keras.backend.clear_session()\n", "# model = search_model(trial, xtr_fde.shape[1])\n", "# train_data = preprocess_data(xtr_fde, ytr_fde)\n", "# model.fit(\n", "# train_data,\n", "# batch_size=BATCHSIZE,\n", "# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n", "# epochs=EPOCHS,\n", "# verbose=0,\n", "# )\n", "# y_pred_search = model.predict(xte_fde, verbose=0)\n", "# score = r2_score(yte_fde, y_pred_search)\n", "# del model\n", "# tf.keras.backend.clear_session()\n", "# gc.collect()\n", "# return score" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# # Colab\n", "# def objective_lo_network(trial):\n", "# tf.keras.backend.clear_session()\n", "# model = search_model(trial, xtr_flo.shape[1])\n", "# train_data = preprocess_data(xtr_flo, ytr_flo)\n", "# model.fit(\n", "# train_data,\n", "# batch_size=BATCHSIZE,\n", "# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n", "# epochs=EPOCHS,\n", "# verbose=0,\n", "# )\n", "# y_pred_search = model.predict(xte_flo, verbose=0)\n", "# score = r2_score(yte_flo, y_pred_search)\n", "# del model\n", "# tf.keras.backend.clear_session()\n", "# gc.collect()\n", "# return score" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "# # Colab\n", "# def objective_hu_network(trial):\n", "# tf.keras.backend.clear_session()\n", "# model = search_model(trial, xtr_fhu.shape[1])\n", "# train_data = preprocess_data(xtr_fhu, ytr_fhu)\n", "# model.fit(\n", "# train_data,\n", "# batch_size=BATCHSIZE,\n", "# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n", "# epochs=EPOCHS,\n", "# verbose=0,\n", "# )\n", "# y_pred_search = model.predict(xte_fhu, verbose=0)\n", "# score = r2_score(yte_fhu, y_pred_search)\n", "# del model\n", "# tf.keras.backend.clear_session()\n", "# gc.collect()\n", "# return score" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "def objective_ws_network(trial):\n", " r2_result = None\n", " current_step = 0 \n", " try:\n", " y_true = np.asarray(y_ws).astype('float')\n", " np.save('new_fps.npy', new_ws)\n", " np.save('y_true.npy', y_true)\n", " \n", " save_model(trial, new_ws)\n", "\n", " lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n", "\n", " result = subprocess.run(['python3', './extra_code/learning_process.py',\n", " str(BATCHSIZE), str(EPOCHS), \n", " str(lr), \n", " 'new_fps.npy', 'y_true.npy', str(trial.number)],\n", " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n", " \n", " if result.stderr:\n", " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n", " if \"could not open file to read NUMA node\" not in line \n", " and \"Your kernel may have been built without NUMA support\" not in line])\n", " if filtered_stderr:\n", " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n", "\n", " lines = result.stdout.splitlines()\n", " for line in lines:\n", " if line.startswith(\"intermediate_value:\"):\n", " _, step, value = line.split(\":\")\n", " step = int(step)\n", " value = float(value)\n", " current_step = step\n", " \n", " trial.report(value, step)\n", " \n", " if trial.should_prune():\n", " raise optuna.exceptions.TrialPruned()\n", "\n", " for line in reversed(lines):\n", " if \"R2:\" in line:\n", " if \"(prune)\" in line:\n", " raise optuna.exceptions.TrialPruned()\n", " else:\n", " r2_result = float(line.split(\":\")[1].strip())\n", " break\n", "\n", " except optuna.exceptions.TrialPruned:\n", " print(f\"Trial pruned at step {current_step}\")\n", " raise\n", " except Exception as e:\n", " print(f\"Exception occurred: {e}\", file=sys.stderr)\n", " r2_result = 0.0\n", "\n", " gc.collect()\n", " return r2_result if r2_result is not None else 0.0" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "def objective_de_network(trial):\n", " r2_result = None\n", " current_step = 0 \n", " try:\n", " y_true = np.asarray(y_de).astype('float')\n", " np.save('new_fps.npy', new_de)\n", " np.save('y_true.npy', y_true)\n", " \n", " save_model(trial, new_de)\n", "\n", " lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n", "\n", " result = subprocess.run(['python3', './extra_code/learning_process.py',\n", " str(BATCHSIZE), str(EPOCHS), \n", " str(lr), \n", " 'new_fps.npy', 'y_true.npy', str(trial.number)],\n", " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n", " \n", " if result.stderr:\n", " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n", " if \"could not open file to read NUMA node\" not in line \n", " and \"Your kernel may have been built without NUMA support\" not in line])\n", " if filtered_stderr:\n", " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n", "\n", " lines = result.stdout.splitlines()\n", " for line in lines:\n", " if line.startswith(\"intermediate_value:\"):\n", " _, step, value = line.split(\":\")\n", " step = int(step)\n", " value = float(value)\n", " current_step = step\n", " \n", " trial.report(value, step)\n", " \n", " if trial.should_prune():\n", " raise optuna.exceptions.TrialPruned()\n", "\n", " for line in reversed(lines):\n", " if \"R2:\" in line:\n", " if \"(prune)\" in line:\n", " raise optuna.exceptions.TrialPruned()\n", " else:\n", " r2_result = float(line.split(\":\")[1].strip())\n", " break\n", "\n", " except optuna.exceptions.TrialPruned:\n", " print(f\"Trial pruned at step {current_step}\")\n", " raise\n", " except Exception as e:\n", " print(f\"Exception occurred: {e}\", file=sys.stderr)\n", " r2_result = 0.0\n", "\n", " gc.collect()\n", " return r2_result if r2_result is not None else 0.0" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "def objective_lo_network(trial):\n", " r2_result = None\n", " current_step = 0 \n", " try:\n", " y_true = np.asarray(y_lo).astype('float')\n", " np.save('new_fps.npy', new_lo)\n", " np.save('y_true.npy', y_true)\n", " \n", " save_model(trial, new_lo)\n", "\n", " lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n", "\n", " result = subprocess.run(['python3', './extra_code/learning_process.py',\n", " str(BATCHSIZE), str(EPOCHS), \n", " str(lr), \n", " 'new_fps.npy', 'y_true.npy', str(trial.number)],\n", " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n", " \n", " if result.stderr:\n", " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n", " if \"could not open file to read NUMA node\" not in line \n", " and \"Your kernel may have been built without NUMA support\" not in line])\n", " if filtered_stderr:\n", " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n", "\n", " lines = result.stdout.splitlines()\n", " for line in lines:\n", " if line.startswith(\"intermediate_value:\"):\n", " _, step, value = line.split(\":\")\n", " step = int(step)\n", " value = float(value)\n", " current_step = step\n", " \n", " trial.report(value, step)\n", " \n", " if trial.should_prune():\n", " raise optuna.exceptions.TrialPruned()\n", "\n", " for line in reversed(lines):\n", " if \"R2:\" in line:\n", " if \"(prune)\" in line:\n", " raise optuna.exceptions.TrialPruned()\n", " else:\n", " r2_result = float(line.split(\":\")[1].strip())\n", " break\n", "\n", " except optuna.exceptions.TrialPruned:\n", " print(f\"Trial pruned at step {current_step}\")\n", " raise\n", " except Exception as e:\n", " print(f\"Exception occurred: {e}\", file=sys.stderr)\n", " r2_result = 0.0\n", "\n", " gc.collect()\n", " return r2_result if r2_result is not None else 0.0" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "def objective_hu_network(trial):\n", " r2_result = None\n", " current_step = 0 \n", " try:\n", " y_true = np.asarray(y_hu).astype('float')\n", " np.save('new_fps.npy', new_hu)\n", " np.save('y_true.npy', y_true)\n", " \n", " save_model(trial, new_hu)\n", "\n", " lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n", "\n", " result = subprocess.run(['python3', './extra_code/learning_process.py',\n", " str(BATCHSIZE), str(EPOCHS), \n", " str(lr), \n", " 'new_fps.npy', 'y_true.npy', str(trial.number)],\n", " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n", " \n", " if result.stderr:\n", " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n", " if \"could not open file to read NUMA node\" not in line \n", " and \"Your kernel may have been built without NUMA support\" not in line])\n", " if filtered_stderr:\n", " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n", "\n", " lines = result.stdout.splitlines()\n", " for line in lines:\n", " if line.startswith(\"intermediate_value:\"):\n", " _, step, value = line.split(\":\")\n", " step = int(step)\n", " value = float(value)\n", " current_step = step\n", " \n", " trial.report(value, step)\n", " \n", " if trial.should_prune():\n", " raise optuna.exceptions.TrialPruned()\n", "\n", " for line in reversed(lines):\n", " if \"R2:\" in line:\n", " if \"(prune)\" in line:\n", " raise optuna.exceptions.TrialPruned()\n", " else:\n", " r2_result = float(line.split(\":\")[1].strip())\n", " break\n", "\n", " except optuna.exceptions.TrialPruned:\n", " print(f\"Trial pruned at step {current_step}\")\n", " raise\n", " except Exception as e:\n", " print(f\"Exception occurred: {e}\", file=sys.stderr)\n", " r2_result = 0.0\n", "\n", " gc.collect()\n", " return r2_result if r2_result is not None else 0.0" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n", "# storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n", "# storage = optuna.storages.RDBStorage(url=storage_urls)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "try:\n", " # optuna.delete_study(study_name=\"ANO_ws_network\", storage=storage)\n", " # optuna.delete_study(study_name=\"ANO_de_network\", storage=storage)\n", " optuna.delete_study(study_name=\"ANO_lo_network\", storage=storage)\n", " # optuna.delete_study(study_name=\"ANO_hu_network\", storage=storage)\n", " pass\n", "except:\n", " pass " ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "TRIALS=1" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-11-04 23:06:11,544] Using an existing study with name 'ANO_de_network' instead of creating a new one.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729176.331625 2499072 service.cc:146] XLA service 0x55701a22e460 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729176.331663 2499072 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729176.465814 2499072 service.cc:146] XLA service 0x55701a205900 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729176.465843 2499072 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729180.613395 2499185 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:09:13,063] Trial 1115 finished with value: 0.965164 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 7490, 'n_decay_l_0': 1e-05, 'n_units_l_1': 2373, 'n_decay_l_1': 1e-06, 'n_units_l_2': 6613, 'n_decay_l_2': 1e-05, 'last_dropout': 0.3, 'lr': 0.001}. Best is trial 1097 with value: 0.983023.\n" ] } ], "source": [ "# study_de_network = optuna.create_study(study_name='ANO_de_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n", "study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n", "# study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n", "# study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n", "study_de_network.optimize(objective_de_network, n_trials=TRIALS)\n", "pruned_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n", "complete_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n", "#74m 22.0s\n", "#386m 42.2 - 100 trial 1000 epochs\n", "#278m 23.3s" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-11-04 23:09:13,086] Using an existing study with name 'ANO_ws_network' instead of creating a new one.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729356.277557 2507565 service.cc:146] XLA service 0x55c7cad07060 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729356.277598 2507565 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729356.416113 2507565 service.cc:146] XLA service 0x55c7cac0bd20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729356.416147 2507565 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729359.300797 2507682 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:09:27,954] Trial 193 finished with value: 0.939087 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 800, 'n_decay_l_0': 1e-06, 'n_units_l_1': 530, 'n_decay_l_1': 1e-05, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 104 with value: 0.970129.\n" ] } ], "source": [ "# study_ws_network = optuna.create_study(study_name='ANO_ws_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n", "study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n", "# study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n", "# study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n", "study_ws_network.optimize(objective_ws_network, n_trials=TRIALS)\n", "pruned_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n", "complete_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n", "# 108m 38.1s\n", "#160m 18.2 - 100 trial 1000 epochs" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "TRIALS=10" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-11-04 23:09:27,984] A new study created in RDB with name: ANO_lo_network\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729368.680796 2510630 service.cc:146] XLA service 0x56035729eda0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729368.680848 2510630 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729368.837668 2510630 service.cc:146] XLA service 0x5603572f9c70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729368.837708 2510630 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729371.354407 2510735 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:09:45,883] Trial 0 finished with value: 0.723669 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 2941, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.1, 'lr': 0.0001}. Best is trial 0 with value: 0.723669.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729388.955076 2513776 service.cc:146] XLA service 0x5615ce07e010 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729388.955114 2513776 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729389.098245 2513776 service.cc:146] XLA service 0x5615ce09f3b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729389.098279 2513776 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729391.952147 2513881 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:10:24,537] Trial 1 finished with value: 0.780745 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 7693, 'n_decay_l_0': 0.0001, 'last_dropout': 0.1, 'lr': 0.0001}. Best is trial 1 with value: 0.780745.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729427.740828 2518105 service.cc:146] XLA service 0x563fb10b11b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729427.740872 2518105 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729427.891154 2518105 service.cc:146] XLA service 0x563fb0f93760 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729427.891196 2518105 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729428.275905 2518208 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:10:56,189] Trial 2 finished with value: 0.861173 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 3091, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'lr': 0.001}. Best is trial 2 with value: 0.861173.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729457.830007 2524433 service.cc:146] XLA service 0x56115c187470 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729457.830062 2524433 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729457.978097 2524433 service.cc:146] XLA service 0x56115c0a57a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729457.978151 2524433 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729463.165301 2524546 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:12:29,474] Trial 3 pruned. \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Trial pruned at step 50\n", "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729550.554875 2527992 service.cc:146] XLA service 0x559ab4767160 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729550.554932 2527992 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729550.699674 2527992 service.cc:146] XLA service 0x559ab47a71a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729550.699710 2527992 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729554.340454 2528101 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:13:00,476] Trial 4 pruned. \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Trial pruned at step 50\n", "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729581.148610 2530567 service.cc:146] XLA service 0x5603394ef990 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729581.148655 2530567 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729581.280871 2530567 service.cc:146] XLA service 0x5603394f7710 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729581.280904 2530567 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729584.114676 2530679 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:13:19,868] Trial 5 pruned. \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Trial pruned at step 50\n", "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729603.760434 2536173 service.cc:146] XLA service 0x55b2351fb2f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729603.760485 2536173 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729603.892722 2536173 service.cc:146] XLA service 0x55b232ce1e60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729603.892765 2536173 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729609.099171 2536280 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:14:49,961] Trial 6 finished with value: 0.761589 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 8234, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.3, 'n_units_l_1': 5907, 'n_decay_l_1': 1e-05, 'F_dropout_1': 0.1, 'n_units_l_2': 5363, 'n_decay_l_2': 1e-05, 'F_dropout_2': 0.2, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729693.449528 2539291 service.cc:146] XLA service 0x562dce6885b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729693.449582 2539291 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729693.592325 2539291 service.cc:146] XLA service 0x562dce5c4c30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729693.592355 2539291 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729697.337810 2539396 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:15:31,025] Trial 7 pruned. \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Trial pruned at step 50\n", "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729732.655927 2542190 service.cc:146] XLA service 0x557b8aaafa90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729732.655980 2542190 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729732.804313 2542190 service.cc:146] XLA service 0x557b8aa19180 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729732.804347 2542190 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729737.528836 2542300 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:17:30,392] Trial 8 finished with value: 0.849003 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 9926, 'n_decay_l_0': 1e-05, 'n_units_l_1': 6304, 'n_decay_l_1': 1e-06, 'n_units_l_2': 1149, 'n_decay_l_2': 0.0001, 'last_dropout': 0.3, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729854.300336 2546634 service.cc:146] XLA service 0x5635047e3fd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729854.300383 2546634 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729854.440994 2546634 service.cc:146] XLA service 0x5635046b3d50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729854.441033 2546634 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729857.327780 2546746 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:18:18,438] Trial 9 finished with value: 0.859502 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 6912, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.3, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n" ] } ], "source": [ "# study_lo_network = optuna.create_study(study_name='ANO_lo_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n", "study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n", "# study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n", "# study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n", "study_lo_network.optimize(objective_lo_network, n_trials=TRIALS)\n", "pruned_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n", "complete_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "TRIALS=1" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-11-04 23:18:18,463] Using an existing study with name 'ANO_hu_network' instead of creating a new one.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model already exists at save_model/full_model.keras\n", "Model successfully saved to save_model/full_model.keras\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "I0000 00:00:1730729902.168016 2552533 service.cc:146] XLA service 0x55fd29098ab0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729902.168077 2552533 service.cc:154] StreamExecutor device (0): Host, Default Version\n", "I0000 00:00:1730729902.305499 2552533 service.cc:146] XLA service 0x55fd28631810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "I0000 00:00:1730729902.305538 2552533 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n", "I0000 00:00:1730729907.273542 2552637 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", "\n", "[I 2024-11-04 23:19:12,205] Trial 144 finished with value: 0.936649 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 1510, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.1, 'n_units_l_1': 2489, 'n_decay_l_1': 1e-06, 'F_dropout_1': 0.1, 'n_units_l_2': 1567, 'n_decay_l_2': 1e-05, 'F_dropout_2': 0.1, 'lr': 0.0001}. Best is trial 130 with value: 0.943809.\n" ] } ], "source": [ "# study_hu_network = optuna.create_study(study_name='ANO_hu_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n", "study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n", "# study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n", "# study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n", "study_hu_network.optimize(objective_hu_network, n_trials=TRIALS)\n", "pruned_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n", "complete_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Study statistics: [ws_structure] \n", " Number of finished trials: 194\n", " Number of pruned trials: 3\n", " Number of complete trials: 168\n", "Best trial:\n", " Value: 0.970129\n", " Params: \n", " n_layers: 2\n", " layer_dropout: 0\n", " n_units_l_0: 205\n", " n_decay_l_0: 1e-06\n", " n_units_l_1: 742\n", " n_decay_l_1: 0.0001\n", " last_dropout: 0.1\n", " lr: 0.0001\n" ] } ], "source": [ "print(\"Study statistics: [ws_structure] \")\n", "print(\" Number of finished trials: \", len(study_ws_network.trials))\n", "print(\" Number of pruned trials: \", len(pruned_trials_ws_newtork))\n", "print(\" Number of complete trials: \", len(complete_trials_ws_newtork))\n", "print(\"Best trial:\")\n", "trials_tmp = study_ws_network.best_trial\n", "print(\" Value: \", trials_tmp.value)\n", "print(\" Params: \")\n", "for key, value in trials_tmp.params.items():\n", " print(\" {}: {}\".format(key, value))" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Study statistics: [de_structure] \n", " Number of finished trials: 1116\n", " Number of pruned trials: 59\n", " Number of complete trials: 1032\n", "Best trial:\n", " Value: 0.983023\n", " Params: \n", " n_layers: 3\n", " layer_dropout: 0\n", " n_units_l_0: 7946\n", " n_decay_l_0: 1e-05\n", " n_units_l_1: 2662\n", " n_decay_l_1: 1e-06\n", " n_units_l_2: 6499\n", " n_decay_l_2: 1e-05\n", " last_dropout: 0.3\n", " lr: 0.001\n" ] } ], "source": [ "print(\"Study statistics: [de_structure] \")\n", "print(\" Number of finished trials: \", len(study_de_network.trials))\n", "print(\" Number of pruned trials: \", len(pruned_trials_de_newtork))\n", "print(\" Number of complete trials: \", len(complete_trials_de_newtork))\n", "print(\"Best trial:\")\n", "trials_tmp = study_de_network.best_trial\n", "print(\" Value: \", trials_tmp.value)\n", "print(\" Params: \")\n", "for key, value in trials_tmp.params.items():\n", " print(\" {}: {}\".format(key, value))" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Study statistics: [lo_structure] \n", " Number of finished trials: 10\n", " Number of pruned trials: 4\n", " Number of complete trials: 6\n", "Best trial:\n", " Value: 0.861173\n", " Params: \n", " n_layers: 1\n", " layer_dropout: 1\n", " n_units_l_0: 3091\n", " n_decay_l_0: 1e-05\n", " F_dropout_0: 0.2\n", " lr: 0.001\n" ] } ], "source": [ "print(\"Study statistics: [lo_structure] \")\n", "print(\" Number of finished trials: \", len(study_lo_network.trials))\n", "print(\" Number of pruned trials: \", len(pruned_trials_lo_newtork))\n", "print(\" Number of complete trials: \", len(complete_trials_lo_newtork))\n", "print(\"Best trial:\")\n", "trials_tmp = study_lo_network.best_trial\n", "print(\" Value: \", trials_tmp.value)\n", "print(\" Params: \")\n", "for key, value in trials_tmp.params.items():\n", " print(\" {}: {}\".format(key, value))" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Study statistics: [hu_structure] \n", " Number of finished trials: 145\n", " Number of pruned trials: 55\n", " Number of complete trials: 78\n", "Best trial:\n", " Value: 0.943809\n", " Params: \n", " n_layers: 3\n", " layer_dropout: 1\n", " n_units_l_0: 3891\n", " n_decay_l_0: 0.0001\n", " F_dropout_0: 0.1\n", " n_units_l_1: 7719\n", " n_decay_l_1: 1e-05\n", " F_dropout_1: 0.3\n", " n_units_l_2: 342\n", " n_decay_l_2: 1e-05\n", " F_dropout_2: 0.1\n", " lr: 0.0001\n" ] } ], "source": [ "print(\"Study statistics: [hu_structure] \")\n", "print(\" Number of finished trials: \", len(study_hu_network.trials))\n", "print(\" Number of pruned trials: \", len(pruned_trials_hu_newtork))\n", "print(\" Number of complete trials: \", len(complete_trials_hu_newtork))\n", "print(\"Best trial:\")\n", "trials_tmp = study_hu_network.best_trial\n", "print(\" Value: \", trials_tmp.value)\n", "print(\" Params: \")\n", "for key, value in trials_tmp.params.items():\n", " print(\" {}: {}\".format(key, value))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "ai", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }