PHM2012滚动轴承全寿命数据处理数据代码

# PHM2012滚动轴承全寿命数据
# %% 1、工具包
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# %% 2、读取文件
#从文件夹里读取水平信号csv数据
def readfile_h(path):
    files = os.listdir(path) #输出该目录下所有文件名
    #将acc文件挑选出来
    files = list(filter(lambda x: x[0:4]=='acc_' , files)) #过滤（里面是规则）符合规则的留下
    #解决乱序问题，以第5位到倒数第4位之间的数字的大小排序
    files.sort(key=lambda x:int(x[5:-4])) 
    rowdata_v = []
    rowdata_h = []
    for file in files:
        info = path+"/"+file
        #将csv文件里的数据转换成矩阵
        data = np.loadtxt(open(info,"rb"), delimiter=',', skiprows=0)
        rowdata_h = np.hstack((rowdata_h,data[:,4]))
        rowdata_v = np.hstack((rowdata_v,data[:,5]))
    return rowdata_h,rowdata_v
path='D:\\乔彬研究生期间文件\\data\\PHM 2012\\zhenghaiyang-phm-ieee-2012-data-challenge-dataset-master\\phm-ieee-2012-data-challenge-dataset\\'
savePath='D:\\乔彬研究生期间文件\\data\\PHM 2012\\zhenghaiyang-phm-ieee-2012-data-challenge-dataset-master\\中间数据\\'
## 2803个csv文件
train1_1_h,train1_1_v = readfile_h(path+'Learning_set\\Bearing1_1')
# #871个csv文件
train1_2_h,train1_2_v = readfile_h(path+'Learning_set\\Bearing1_2')
test1_3_h,test1_3_v = readfile_h(path+'Full_Test_Set\\Bearing1_3')
test1_5_h,test1_5_v = readfile_h(path+'Full_Test_Set\\Bearing1_5')
test1_6_h,test1_6_v = readfile_h(path+'Full_Test_Set\\Bearing1_6')
test1_7_h,test1_7_v = readfile_h(path+'Full_Test_Set\\Bearing1_7')
#读取1_4的数据
def readfile_h4(path):
    files = os.listdir(path)
    #将acc文件挑选出来
    files = list(filter(lambda x: x[0:4]=='acc_' , files))
    # #解决乱序问题，以第5位到倒数第4位之间的数字的大小排序
    files.sort(key=lambda x:int(x[5:-4]))
    rowdata_v = []
    rowdata_h = []    
    for file in files:
        info = path+"\\"+file
        #将csv文件里的数据转换成矩阵
        data = np.loadtxt(open(info,"rb"), delimiter=';',skiprows=0)
        rowdata_h = np.hstack((rowdata_v,data[:,4]))
        rowdata_v = np.hstack((rowdata_h,data[:,5]))
        
    return rowdata_h,rowdata_v
test1_4_h,test1_4_v = readfile_h4(path+'Full_Test_Set\\Bearing1_4')

# %% 3、绘制图像
# plot horizontal sign
plt.figure(figsize=(20,15))
plt.subplot(4, 2, 1)
plt.plot(train1_1_h)
plt.title('horizontal1')
plt.subplot(4, 2, 2)
plt.plot(train1_2_h)
plt.title('horizontal2')
plt.subplot(4, 2, 3)
plt.plot(test1_3_h)
plt.title('horizontal3')
plt.subplot(4, 2, 4)
plt.plot(test1_4_h)
plt.title('horizontal4')
plt.subplot(4, 2, 5)
plt.plot(test1_5_h)
plt.title('horizontal5')
plt.subplot(4, 2, 6)
plt.plot(test1_6_h)
plt.title('horizontal6')
plt.subplot(4, 2, 7)
plt.plot(test1_7_h)
plt.title('horizontal7')
plt.savefig(savePath+"horizontal.png")
plt.show()

# plot vertical sign
plt.figure(figsize=(20,15))
plt.subplot(4, 2, 1)
plt.plot(train1_1_v)
plt.title('vertical1')
plt.subplot(4, 2, 2)
plt.plot(train1_2_v)
plt.title('vertical2')
plt.subplot(4, 2, 3)
plt.plot(test1_3_v)
plt.title('vertical3')
plt.subplot(4, 2, 4)
plt.plot(test1_4_v)
plt.title('vertical4')
plt.subplot(4, 2, 5)
plt.plot(test1_5_v)
plt.title('vertical5')
plt.subplot(4, 2, 6)
plt.plot(test1_6_v)
plt.title('vertical6')
plt.subplot(4, 2, 7)
plt.plot(test1_7_v)
plt.title('vertical7')
plt.savefig(savePath+"vertical.png")
plt.show()

# %% 4、数据标准化处理
#将数据标准化
mean1_1_h =train1_1_h - np.mean(train1_1_h)
train1_h = mean1_1_h/np.std(train1_1_h)
mean1_1_v =train1_1_v - np.mean(train1_1_v)
train1_v = mean1_1_v/np.std(train1_1_v)

mean1_2_h =train1_2_h - np.mean(train1_2_h)
train2_h =mean1_2_h/ np.std(train1_2_h)
mean1_2_v =train1_2_v - np.mean(train1_2_v)
train2_v =mean1_2_v/ np.std(train1_2_v)

mean1_3_h =test1_3_h - np.mean(test1_3_h)
test3_h = mean1_3_h/np.std(test1_3_h)
mean1_3_v =test1_3_v - np.mean(test1_3_v)
test3_v = mean1_3_v/np.std(test1_3_v)

mean1_4_h =test1_4_h - np.mean(test1_4_h)
test4_h = mean1_4_h/np.std(test1_4_h)
mean1_4_v =test1_4_v - np.mean(test1_4_v)
test4_v = mean1_4_v/np.std(test1_4_v)

mean1_5_h =test1_5_h - np.mean(test1_5_h)
test5_h = mean1_5_h /np.std(test1_5_h)
mean1_5_v =test1_5_v - np.mean(test1_5_v)
test5_v = mean1_5_v /np.std(test1_5_v)

mean1_6_h =test1_6_h - np.mean(test1_6_h)
test6_h = mean1_6_h /np.std(test1_6_h)
mean1_6_v =test1_6_v - np.mean(test1_6_v)
test6_v = mean1_6_v /np.std(test1_6_v)

mean1_7_h=test1_7_h - np.mean(test1_7_h)
test7_h = mean1_7_h /np.std(test1_7_h)
mean1_7_v=test1_7_v - np.mean(test1_7_v)
test7_v = mean1_7_v /np.std(test1_7_v)



xtr1_h=train1_h.reshape(-1,2560,1)
xtr2_h=train2_h.reshape(-1,2560,1)
xte3_h= test3_h.reshape(-1,2560,1)
xte4_h= test4_h.reshape(-1,2560,1)
xte5_h = test5_h.reshape(-1,2560,1)
xte6_h = test6_h.reshape(-1,2560,1)
xte7_h = test7_h.reshape(-1,2560,1)

xtr1_v=train1_v.reshape(-1,2560,1)
xtr2_v=train2_v.reshape(-1,2560,1)
xte3_v= test3_v.reshape(-1,2560,1)
xte4_v= test4_v.reshape(-1,2560,1)
xte5_v = test5_v.reshape(-1,2560,1)
xte6_v = test6_v.reshape(-1,2560,1)
xte7_v = test7_v.reshape(-1,2560,1)


#将垂直信号和水平信号拼接在一起
xtr1 = np.hstack((xtr1_v,xtr1_h))
xtr1 = xtr1.reshape((-1,2560,2))
xtr2 = np.hstack((xtr2_v,xtr2_h))
xtr2 = xtr2.reshape((-1,2560,2))
xte3 = np.hstack((xte3_v,xte3_h))
xte3 = xte3.reshape((-1,2560,2))
xte4 = np.hstack((xte4_v,xte4_h))
xte4 = xte4.reshape((-1,2560,2))
xte5 = np.hstack((xte5_v,xte5_h))
xte5 = xte5.reshape((-1,2560,2))
xte6 = np.hstack((xte6_v,xte6_h))
xte6 = xte6.reshape((-1,2560,2))
xte7 = np.hstack((xte7_v,xte7_h))
xte7= xte7.reshape((-1,2560,2))


# %%5、制作标签
#数据标签，剩余寿命
ytr1 = np.arange(xtr1.shape[0])
ytr1 = ytr1[::-1].reshape((-1,1))

ytr2 = np.arange(xtr2.shape[0])
ytr2 = ytr2[::-1].reshape((-1,1))

yte3 = np.arange(xte3.shape[0])
yte3 = yte3[::-1].reshape((-1,1))

yte4 = np.arange(xte4.shape[0])
yte4 = yte4[::-1].reshape((-1,1))

yte5 = np.arange(xte5.shape[0])
yte5 = yte5[::-1].reshape((-1,1))

yte6 = np.arange(xte6.shape[0])
yte6 = yte6[::-1].reshape((-1,1))

yte7 = np.arange(xte7.shape[0])
yte7 = yte7[::-1].reshape((-1,1))



#将y标签归一化0——1，采用剩余寿命的百分比作为输出标签
#使用剩余寿命的百分比作为标签值
#不乘100为了使用rnn的sigmoid激活函数
min_max_scaler = MinMaxScaler()
ytr1 = min_max_scaler.fit_transform(ytr1)*100
ytr2 =  min_max_scaler.fit_transform(ytr2)*100
yte3 =  min_max_scaler.fit_transform(yte3)*100
yte4 =  min_max_scaler.fit_transform(yte4)*100
yte5 =  min_max_scaler.fit_transform(yte5)*100
yte6 =  min_max_scaler.fit_transform(yte6)*100
yte7 =  min_max_scaler.fit_transform(yte7)*100



# %% 6、数据分组，方便训练与测试
xtr_b1= np.vstack((xtr2,xte3,xte4,xte5,xte6,xte7))
# xtr = xtr.reshape(-1,2560,1)
print(xtr_b1.shape)

ytr_b1 = np.vstack((ytr2,yte3,yte4,yte5,yte6,yte7))
print(ytr_b1.shape)

#将1_2为测试集
xtr_b2= np.vstack((xtr1,xte3,xte4,xte5,xte6,xte7))
print(xtr_b2.shape)

ytr_b2 = np.vstack((ytr1,yte3,yte4,yte5,yte6,yte7))
print(ytr_b2.shape)

#将1_3为测试集
xtr_b3= np.vstack((xtr1,xtr2,xte4,xte5,xte6,xte7))
print(xtr_b3.shape)

ytr_b3 = np.vstack((ytr1,ytr2,yte4,yte5,yte6,yte7))
print(ytr_b3.shape)


#将1_4为测试集
xtr_b4= np.vstack((xtr2,xte3,xte4,xte5,xte6,xte7))
print(xtr_b4.shape)

ytr_b4 = np.vstack((ytr2,yte3,yte4,yte5,yte6,yte7))
print(ytr_b4.shape)

#将1_5为测试集
xtr_b5= np.vstack((xtr1,xtr2,xte3,xte4,xte6,xte7))
print(xtr_b5.shape)

ytr_b5 = np.vstack((ytr1,ytr2,yte3,yte4,yte6,yte7))
print(ytr_b5.shape)


#将1_6为测试集
xtr_b6= np.vstack((xtr1,xtr2,xte3,xte4,xte5,xte7))
print(xtr_b6.shape)

ytr_b6 = np.vstack((ytr1,ytr2,yte3,yte4,yte5,yte7))
print(ytr_b6.shape)

#将1_7为测试集其余做训练集
xtr_b7= np.vstack((xtr1,xtr2,xte3,xte4,xte5,xte6))
print(xtr_b7.shape)

ytr_b7 = np.vstack((ytr1,ytr2,yte3,yte4,yte5,yte6))
print(ytr_b7.shape)

# In[29]:
train_data_list = [(xtr_b1,ytr_b1,xtr1,ytr1),(xtr_b2,ytr_b2,xtr2,ytr2),(xtr_b3,ytr_b3,xte3,yte3),(xtr_b4,ytr_b4,xte4,yte4),(xtr_b5,ytr_b5,xte5,yte5),(xtr_b6,ytr_b6,xte6,yte6),(xtr_b7,ytr_b7,xte7,yte7)]