本文共 1976 字,大约阅读时间需要 6 分钟。
#_*_ coding: utf-8 _*_import numpy as npimport pandas as pds= pd.Series([1,3,5,np.nan,6,8])dates = pd.date_range('20130101', periods=6)df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))#根据字典对象生成 DataFramedf2 = pd.DataFrame({'A': 1.,'B':pd.Timestamp('20210123'),'C': pd.Series(1., index=list(range(0,4)), dtype='float32'), 'D':np.array([3]*4,dtype='int32'),'E': pd.Categorical(["test", "train", "test", "train"]), 'F': 'foo'},index = np.random.randint(1,9,(1,4)).ravel())print(df.to_numpy())# 将pandas底层数据转换成numpy类型print(df.sort_index(axis=0,ascending=True))# 根据索引或者column排序print(df.sort_values(by='B',ascending=False))# 按值排序## 获取数据print(df['A'],df[0:3],df['20210124':'20210126'], df.loc[dates[0]],df.loc[:,['A','B']], df.loc[dates[0:2],['A','B']], df.loc[dates[0], 'A'])#loc是按标签选择print(df.iloc[0], df.iloc[3:5, 0:2], df.iloc[[1, 2, 4], [0, 2]], df.iloc[:, 1:3],df.iloc[1, 1])#布尔索引print(df[df['A']>0], df[df > 0])#isin筛选,跟in类似df['E'] = ['one', 'one', 'two', 'three', 'four', 'three']print(df[df['E'].isin(['two', 'four'])])#赋值#按标签赋值df.loc[dates[0],'A'] = 0#按位置赋值df.iloc[1,0] = 0#按numpy数组赋值df.loc[:,'D'] = np.array([5.]*len(df))#删除列df= df.drop(columns=['E'])#重建索引df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])df1.loc[dates[0]:dates[1], 'E'] = 1#删除所有缺失值的行df1.dropna(how ='any')#补充缺失值df1.fillna(value=5)#提取 nan 值的布尔掩码pd.isna(df1)#平均值,一般情况下NaN会被忽略df1.mean(),df1.mean(1)#相减运算df1.sub(df1*0.1,axis = 'index')#apply函数df1.apply(lambda x: x.max() - x.min())#直方图s = pd.DataFrame(np.random.randint(0,7,(2,10)))s.loc[0].value_counts()#字符串s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])print(s.str.lower())#合并pd.concat([df1["A"],df1['D']])pd.concat([df1.loc[dates[0]],df1.loc[dates[2]]])#连接left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})pd.merge(left,right,on = 'key')
转载地址:http://ehben.baihongyu.com/