视频讲解:https://www.yuque.com/chudi/tzqav9/ny150b#aalY8
import tensorflow as tf
from tensorflow import keras
from utils import *
EPOCH = 10
BATCH_SIZE = 32
VEC_DIM = 10
DNN_LAYERS = [64, 128, 64]
DROPOUT_RATE = 0.5
base, test = loadData()
# 所有的特征各个类别值个数之和
FEAT_CATE_NUM = base.shape[1] - 1
K = tf.keras.backend
def run():
# 将所有的特征的各个类别值统一id化。x中每行为各特征的类别值的id
val_x, val_y = getAllData(test)
train_x, train_y = getAllData(base)
cate_num = val_x[0].shape[0]
sub_emb_arr = []
product_list = []
inputs = keras.Input((cate_num,))
emb = keras.layers.Embedding(FEAT_CATE_NUM, VEC_DIM, input_length=cate_num)(inputs)
liner = keras.layers.Flatten()(emb)
# 将emb切分成各个field的小emb
split_arr = tf.split(emb, cate_num, 1)
for split in split_arr:
sub_emb_arr.append(keras.layers.Flatten()(split))
# 内积
for i in range(0, len(sub_emb_arr)):
for j in range(i + 1, len(sub_emb_arr)):
product_list.append(keras.layers.Dot(axes=1)([sub_emb_arr[i], sub_emb_arr[j]]))
dense = keras.layers.concatenate(product_list + [liner])
dense = keras.layers.Dropout(DROPOUT_RATE)(dense)
for units in DNN_LAYERS:
dense = keras.layers.Dense(units, activation='relu')(dense)
dense = keras.layers.Dropout(DROPOUT_RATE)(dense)
outputs = keras.layers.Dense(1, activation='sigmoid')(dense)
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer=tf.train.AdamOptimizer(0.001), metrics=[keras.metrics.AUC()])
tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs',
histogram_freq=0,
write_graph=True,
write_grads=True,
write_images=True,
embeddings_freq=0,
embeddings_layer_names=None,
embeddings_metadata=None)
model.fit(train_x, train_y, batch_size=BATCH_SIZE, epochs=EPOCH, verbose=2, validation_data=(val_x, val_y),
callbacks=[tbCallBack])
run()