import pandas as pd import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.model_selection import train_test_split import joblib import os device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') class CustomDataset(Dataset): def __init__(self, features, labels=None): self.X = torch.tensor(features, dtype=torch.float32) if labels is not None: self.y = torch.tensor(labels, dtype=torch.long) else: self.y = None def __len__(self): return len(self.X) def __getitem__(self, idx): if self.y is not None: return self.X[idx], self.y[idx] else: return self.X[idx] class NeuralNet(nn.Module): def __init__(self, input_size, hidden_size, num_classes): super(NeuralNet, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.fc2 = nn.Linear(hidden_size, num_classes) def forward(self, x): out = self.fc1(x) out = self.relu(out) out = self.fc2(out) return out def main(): train_csv_path = 'data/train_data.csv' model_save_path = 'models/model.pth' scaler_save_path = 'preprocess/scaler.joblib' encoder_save_path = 'preprocess/encoder.joblib' os.makedirs('models', exist_ok=True) os.makedirs('preprocess', exist_ok=True) data = pd.read_csv(train_csv_path) feature_columns = ['DATA', 'time', 'Globle_real', 'globle_act', 'Voltage', 'global_in', 'sub_mentor1', 'sub_mentor2', 'sub_mentor3'] X = data[feature_columns] y = data['label'] encoder = LabelEncoder() X['DATA'] = encoder.fit_transform(X['DATA']) joblib.dump(encoder, encoder_save_path) scaler = StandardScaler() X_scaled = scaler.fit_transform(X) joblib.dump(scaler, scaler_save_path) y = y - 1 X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y) train_dataset = CustomDataset(X_train, y_train) val_dataset = CustomDataset(X_val, y_val) train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False) input_size = X_train.shape[1] hidden_size = 64 num_classes = len(y.unique()) num_epochs = 100 learning_rate = 0.001 model = NeuralNet(input_size, hidden_size, num_classes).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(num_epochs): model.train() running_loss = 0.0 correct = 0 total = 0 for batch_X, batch_y in train_loader: batch_X = batch_X.to(device) batch_y = batch_y.to(device) outputs = model(batch_X) loss = criterion(outputs, batch_y) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() * batch_X.size(0) _, predicted = torch.max(outputs.data, 1) total += batch_y.size(0) correct += (predicted == batch_y).sum().item() epoch_loss = running_loss / len(train_dataset) epoch_acc = correct / total model.eval() val_loss = 0.0 val_correct = 0 val_total = 0 with torch.no_grad(): for val_X, val_y in val_loader: val_X = val_X.to(device) val_y = val_y.to(device) outputs = model(val_X) loss = criterion(outputs, val_y) val_loss += loss.item() * val_X.size(0) _, predicted = torch.max(outputs.data, 1) val_total += val_y.size(0) val_correct += (predicted == val_y).sum().item() val_epoch_loss = val_loss / len(val_dataset) val_epoch_acc = val_correct / val_total if (epoch + 1) % 10 == 0 or epoch == 0: print(f'Epoch [{epoch + 1}/{num_epochs}], ' f'Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}, ' f'Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}') torch.save(model.state_dict(), model_save_path) print(f"模型已保存到 {model_save_path}") if __name__ == '__main__': main()