Data Structures

Pivot

df.pivot(index='date', columns='type', calues'value')
# idx/(date, type, value) -> date/type

Melt

pd.melt(df, id_vars=['date'], value_vars=['week_1', 'week_2'], value_name='value')

Indexing

df.loc[:, (df>1).any()] # Select cols with any vals > 1
df.loc[:, (df>1).all()] # Select cols with vals > 1
 
df.loc[:, df.isnull().any()] # Select cols with NaN
df.loc[:, df.notnull().all()] # Select cols without NaN

Grouping Data

df.groupby(by=['date', 'type']).mean()

Missing Data

df.dropna(axis=0) # axis=0: rows, axis=1: columns
df.fillna(0)
df.replace('Null', 0)

Combining Data

pd.merge(df_1, df_1, how='left', on='key')
df_1.join(df_2, how='left', on='key')
 
# Concat
df_1.append(df_2) # Vertical
 
pd.concat([df_1, df_2], axis=0) # Vertical
pd.concat([df_1, df_2], axis=1) # Horizontal

Performance Comparison

Apply Function

methodtime
for i, row in df.iterrows():7103
for i in range(len(df)):6219
df.apply(func, axis=1)3476
func_np(df) (vectorization with numpy array)48
func(df) (vectorizatioin with pandas series)1

Select Values (isin)

methodtime
df = df[[x in key_series for x in df['col']]]71
df = df[df['col'].apply(lambda x: x in key_series)]55
df = df.merge(key_series, how='inner', on='col')4.5
df = df[df.isin({'col':key_series})['col']]3.3
df = df.query('col in @key_series')2.8
df = df[np.isin(df['col'], key_series)]1