pd的默认索引是从零开始的数字,把一列设置为新的索引可以更便于操作
有的表格可能没有header,pandas默认第一行为header,这种情况pandas会读取不到第一行数据
data2pd.read_csv("test.cvs",header=None)#不把第一行作列属性
# 将列head变为索引,这样可以很方便的提取时间
df = data.set_index('故障发生时间')
df1 = df['2020-03-01':'2020-03-29']
index 和 columns 索引互转
df2 = pd.DataFrame()
writer = pd.ExcelWriter(f'hello.xlsx', engine='xlsxwriter', mode='wa')
for excel in file_list:
alarm_data = pd.read_excel(os.path.join(BASE, excel))
alarm_data.loc[:, '省份'] = alarm_data['省份'].apply(lambda x: str(x).rstrip('省').rstrip('市'))
aone = alarm_data['网管告警id']
atwo = alarm_data['省份']
athree = alarm_data['地市']
afour = alarm_data['告警清除时间']
afive = alarm_data['告警标题']
for line in list(zip(one, two, three, five, four, index)):
fil = alarm_data[
(aone == line[0]) & (atwo == line[1]) & (athree == line[2]) & (afive == line[3]) & (afour == line[4])]
if np.any(fil):
one_fil = fil.iloc[0:1, :]
df2 = pd.concat([df2, one_fil])
print(df2)
该方法和Python的all/any一样,但Python只能处理一维的情况,该方法的作用是在处理一些返回可能为空的情况,很好用
# 避免出现模棱两可的情况
# https://blog.csdn.net/weixin_39449466/article/details/81008505
# any 可迭代对象,有一个为True,则返回True,any([])返回False,可迭代对象若出现0,表示False
for line in list(zip(one, two, three, five, four, index)):
fil = alarm_data[(aone == line[0]) & (atwo == line[1]) & (athree == line[2]) & (afive == line[3]) & (afour == line[4])]
# 过滤的数据,可能为None,np.any([None])
if np.any(fil):
one_fil = fil.iloc[0:1, :]
df2 = pd.concat([df2, one_fil])
# all 可迭代对象只有全为True才返回True,特列是np.all([]) 返回True
np.all([])
True