2016-03-04 246 views




> df1.merge(df2, right_index=1, left_index=1, suffixes=("_1", "_2")) 
    a_1 b_1 a_2 b_2 
0 1 2 1.1 2.1 
1 3 4 3.1 4.1 




def cmp(df1, df2, topn=10): 
    n = topn 
    a = df1.reset_index().head(n=n) 
    b = df2.reset_index().head(n=n) 

    span = pd.DataFrame(data=[('-',) for _ in range(n)], columns=['sep']) 

    a = a.merge(span, right_index=1, left_index=1) 
    return a.merge(b, right_index=1, left_index=1, suffixes=['_L', '_R']) 


def side_by_side(*objs, **kwds): 
    ''' Une fonction print objects side by side ''' 
    from pandas.io.formats.printing import adjoin 
    space = kwds.get('space', 4) 
    reprs = [repr(obj).split('\n') for obj in objs] 
    print(adjoin(space, *reprs)) 

# building a test case of two DataFrame 
import pandas as pd 
import numpy as np 

n, p = (10, 3) # dfs' shape 

# dfs indexes and columns labels 
index_rowA = [t[0]+str(t[1]) for t in zip(['rA']*n, range(n))] 
index_colA = [t[0]+str(t[1]) for t in zip(['cA']*p, range(p))] 

index_rowB = [t[0]+str(t[1]) for t in zip(['rB']*n, range(n))] 
index_colB = [t[0]+str(t[1]) for t in zip(['cB']*p, range(p))] 

# buliding the df A and B 
dfA = pd.DataFrame(np.random.rand(n,p), index=index_rowA, columns=index_colA) 
dfB = pd.DataFrame(np.random.rand(n,p), index=index_rowB, columns=index_colB) 


  cA0  cA1  cA2    cB0  cB1  cB2 
rA0 0.708763 0.665374 0.718613 rB0 0.320085 0.677422 0.722697 
rA1 0.120551 0.277301 0.646337 rB1 0.682488 0.273689 0.871989 
rA2 0.372386 0.953481 0.934957 rB2 0.015203 0.525465 0.223897 
rA3 0.456871 0.170596 0.501412 rB3 0.941295 0.901428 0.329489 
rA4 0.049491 0.486030 0.365886 rB4 0.597779 0.201423 0.010794 
rA5 0.277720 0.436428 0.533683 rB5 0.701220 0.261684 0.502301 
rA6 0.391705 0.982510 0.561823 rB6 0.182609 0.140215 0.389426 
rA7 0.827597 0.105354 0.180547 rB7 0.041009 0.936011 0.613592 
rA8 0.224394 0.975854 0.089130 rB8 0.697824 0.887613 0.972838 
rA9 0.433850 0.489714 0.339129 rB9 0.263112 0.355122 0.447154