安装
pip install pandas
写入csv
import pandas as pd
df=pd.DataFrame([["A", "B", "C", "D", "E", "F", "G"],[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]])
df.to_csv('Result1.csv', index=None,header=None, encoding="utf-8")
写入xlsx
import pandas as pd
df=pd.DataFrame([["A", "B", "C", "D", "E", "F", "G"],[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]])
writer = pd.ExcelWriter("Result.xlsx", engine = "xlsxwriter")
df.to_excel(writer, index=None, header=None, encoding="utf-8")
writer.save()
写入多个sheet
import pandas as pd
df1=pd.DataFrame([["A", "B", "C", "D", "E", "F", "G"],[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]])
df2=pd.DataFrame([["A", "B", "C", "D", "E", "F", "G"],[1,2,3,4,5,6,7],[7,6,5,4,3,2,1]])
writer = pd.ExcelWriter('output.xlsx')
df1.to_excel(writer,'Sheet1',index=None,header=None, encoding="utf-8")
df2.to_excel(writer,'Sheet2',index=None,header=None, encoding="utf-8")
writer.save()
group by
valid_data.groupby(valid_data["label"]).size()
读取excel
df = pd.read_excel("2017-12-31 ~ 2018-12-31.xlsx")
news_id_list = df["id"].tolist()
for i in list(df.itertuples())[1:]:
print(i)
break
行操作
# 去掉前面5行
df = df.iloc[5:]
# 去掉前面3行
df = df.iloc[3:]
# 将第四行设置为header
new_header = df.iloc[0] # 获取第四行作为新的列标签
df = df[1:] # 去掉第四行
df.columns = new_header # 设置新的列标签
列操作
# 合并前两列为一列
df['交易时间'] = df['交易时间'].astype(str)
df['交易时间'] = df['交易时间'].str.split().str[0]
df['微信支付单号'] = df['微信支付单号'].astype(str)
df["交易时间"] = df["交易时间"] + " " + df["微信支付单号"]
df = df.drop(columns=["微信支付单号"])
# 将后面的列标签向后移动一位
headers = df.columns.tolist()
headers = [headers[0]] +["微信支付单号"] + headers[1:-1]
df.columns = headers
过滤掉收入为空的行
df['收入'] = df['收入'].replace(' ', np.nan)
df = df.dropna(subset=['收入'])