pandas 查询写入

cooolr 于 2022-08-24 发布

安装

pip install pandas

写入csv

import pandas as pd

df=pd.DataFrame([["A", "B", "C", "D", "E", "F", "G"],[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]])

df.to_csv('Result1.csv', index=None,header=None, encoding="utf-8")

写入xlsx

import pandas as pd

df=pd.DataFrame([["A", "B", "C", "D", "E", "F", "G"],[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]])

writer = pd.ExcelWriter("Result.xlsx", engine = "xlsxwriter")

df.to_excel(writer, index=None, header=None, encoding="utf-8")

writer.save()

写入多个sheet

import pandas as pd

df1=pd.DataFrame([["A", "B", "C", "D", "E", "F", "G"],[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]])
df2=pd.DataFrame([["A", "B", "C", "D", "E", "F", "G"],[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]])

writer = pd.ExcelWriter('output.xlsx')
df1.to_excel(writer,'Sheet1',index=None,header=None, encoding="utf-8")
df2.to_excel(writer,'Sheet2',index=None,header=None, encoding="utf-8")
writer.save()

group by

valid_data.groupby(valid_data["label"]).size()

读取excel

df = pd.read_excel("2017-12-31 ~ 2018-12-31.xlsx")
news_id_list = df["id"].tolist()

遍历

for i in list(df.itertuples())[1:]:
    print(i)
    break

行操作

# 去掉前面5行
df = df.iloc[5:]

# 去掉前面3行
df = df.iloc[3:]

# 将第四行设置为header
new_header = df.iloc[0]  # 获取第四行作为新的列标签
df = df[1:]  # 去掉第四行
df.columns = new_header  # 设置新的列标签

列操作

# 合并前两列为一列
df['交易时间'] = df['交易时间'].astype(str)
df['交易时间'] = df['交易时间'].str.split().str[0]
df['微信支付单号'] = df['微信支付单号'].astype(str)
df["交易时间"] = df["交易时间"] + " " + df["微信支付单号"]
df = df.drop(columns=["微信支付单号"])
# 将后面的列标签向后移动一位
headers = df.columns.tolist()
headers = [headers[0]] +["微信支付单号"] + headers[1:-1]
df.columns = headers

过滤掉收入为空的行

df['收入'] = df['收入'].replace(' ', np.nan)
df = df.dropna(subset=['收入'])