建立了一个预测沉降的LSTM多输出模型,有4个点的沉降数据,想要构建一个分层或混合模型,其中模型的第一部分学习所有监测点的通用特征,而第二部分根据每个监测点的特定特性进行个性化调整,代码如下:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Input, LSTM, Dense, Dropout
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
# 固定随机种子
def set_seed(seed_value=42):
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)
set_seed(42) # 设置随机种子
# 加载数据
data = pd.read_excel('沉降数据集.xlsx')
data.columns = ['Point', 'Time', 'Settlement']
# 数据归一化
scaler = MinMaxScaler(feature_range=(0, 1))
data['Settlement_scaled'] = scaler.fit_transform(data[['Settlement']])
# 准备数据集函数
def create_dataset(data, point, look_back=1):
point_data = data[data['Point'] == point]
X, Y = [], []
for i in range(len(point_data) - look_back):
a = point_data.iloc[i:(i + look_back)]['Settlement_scaled'].values
X.append(a)
Y.append(point_data.iloc[i + look_back]['Settlement_scaled'])
return np.array(X), np.array(Y)
look_back = 3
# 创建 LSTM 模型
model = Sequential([
LSTM(100, return_sequences=True, input_shape=(look_back, 1)),
Dropout(0.2),
LSTM(50),
Dropout(0.2),
Dense(1)
])
optimizer = Adam(learning_rate=0.001)
model.compile(loss='mean_squared_error', optimizer=optimizer)
# 准备所有点的训练数据和测试数据
all_X_train = []
all_y_train = []
all_X_test = []
all_y_test = []
for point in data['Point'].unique():
X, y = create_dataset(data[data['Point'] == point], point, look_back)
# 使用点数据中的时间标准,80天前为训练集
split_index = data[(data['Point'] == point) & (data['Time'] <= 80)].index[-1] - data[data['Point'] == point].index[0] + 1
X_train, y_train = X[:split_index + 1], y[:split_index + 1]
X_test, y_test = X[split_index + 1:], y[split_index + 1:]
all_X_train.append(X_train)
all_y_train.append(y_train)
all_X_test.append(X_test)
all_y_test.append(y_test)
# 合并所有点的训练数据
X_train = np.concatenate(all_X_train, axis=0)
y_train = np.concatenate(all_y_train, axis=0)
X_train = np.reshape(X_train, (X_train.shape[0], look_back, 1))
# 训练模型
model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=1)
但是出现了模型维度的问题,输入的X_train是一个数组59个样本,输出的Y_train是一个字典14,15,15,15。
ValueError: Data cardinality is ambiguous:
x sizes: 59
y sizes: 14, 15, 15, 15
Make sure all arrays contain the same number of samples.
本来以为是每个监测点的数据不一致导致的,就改成了取最小值,但是还是出现问题:
ValueError: Data cardinality is ambiguous:
x sizes: 56
y sizes: 14, 14, 14, 14
Make sure all arrays contain the same number of samples.
跪求求大佬帮忙解决
相似问题