基于随机森林算法的化合物二分类机器学习模型
----
代码示例
#导入依赖包
import pandas as pd
import numpy as np
from rdkit import Chem,...Fingerprinter.FingerprintMol(mol)[0]
return np.append(arr,ds)
# 载入数据.
df=pd.read_csv('mol_IC50.csv',usecols=[0,1,4])
#随机打乱数据...pIC50']>6, 1, 0)
# 将描述符和活性数据转化为数组
X = np.array(list(df['Descriptors']))
y = df['Active'].values
# 划分训练集和测试集...X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25)
# 创建随机森林模型并拟合数据