>>> import pandas as pd >>> multicol = pd.MultiIndex.from_tuples([('weight', 'kg'), ('weight', 'pounds')]) >>> obj = pd.DataFrame([[1, 2], [2, 4]], index=['cat', 'dog'], columns=multicol) >>> obj weight kg pounds cat 12 dog 24 >>> >>> obj.stack() weight cat kg 1 pounds 2 dog kg 2 pounds 4
缺失值填充:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
>>> import pandas as pd >>> multicol = pd.MultiIndex.from_tuples([('weight', 'kg'), ('height', 'm')]) >>> obj = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], index=['cat', 'dog'], columns=multicol) >>> obj weight height kg m cat 1.02.0 dog 3.04.0 >>> >>> obj.stack() height weight cat kg NaN 1.0 m 2.0 NaN dog kg NaN 3.0 m 4.0 NaN
>>> import pandas as pd >>> multicol = pd.MultiIndex.from_tuples([('weight', 'kg'), ('height', 'm')]) >>> obj = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], index=['cat', 'dog'], columns=multicol) >>> obj weight height kg m cat 1.02.0 dog 3.04.0 >>> >>> obj.stack(level=0) kg m cat height NaN 2.0 weight 1.0 NaN dog height NaN 4.0 weight 3.0 NaN >>> >>> obj.stack(level=1) height weight cat kg NaN 1.0 m 2.0 NaN dog kg NaN 3.0 m 4.0 NaN >>> >>> obj.stack(level=[0, 1]) cat height m 2.0 weight kg 1.0 dog height m 4.0 weight kg 3.0 dtype: float64
>>> import pandas as pd >>> multicol = pd.MultiIndex.from_tuples([('weight', 'kg'), ('height', 'm')]) >>> obj = pd.DataFrame([[None, 1.0], [2.0, 3.0]], index=['cat', 'dog'], columns=multicol) >>> obj weight height kg m cat NaN 1.0 dog 2.03.0 >>> >>> obj.stack(dropna=False) height weight cat kg NaN NaN m 1.0 NaN dog kg NaN 2.0 m 3.0 NaN >>> >>> obj.stack(dropna=True) height weight cat m 1.0 NaN dog kg NaN 2.0 m 3.0 NaN
>>> import pandas as pd >>> obj = pd.Series([1, 2, 3, 4], index=pd.MultiIndex.from_product([['one', 'two'], ['a', 'b']])) >>> obj one a 1 b 2 two a 3 b 4 dtype: int64 >>> >>> obj.unstack() a b one 12 two 34 >>> >>> obj.unstack(level=0) one two a 13 b 24
和 stack 方法类似,如果值不存在将会引入缺失值(NaN):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
>>> import pandas as pd >>> obj1 = pd.Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd']) >>> obj2 = pd.Series([4, 5, 6], index=['c', 'd', 'e']) >>> obj3 = pd.concat([obj1, obj2], keys=['one', 'two']) >>> obj3 one a 0 b 1 c 2 d 3 two c 4 d 5 e 6 dtype: int64 >>> >>> obj3.unstack() a b c d e one 0.01.02.03.0 NaN two NaN NaN 4.05.06.0
>>> import pandas as pd >>> import numpy as np >>> obj = pd.DataFrame(np.arange(6).reshape((2, 3)), index=pd.Index(['Ohio','Colorado'], name='state'), columns=pd.Index(['one', 'two', 'three'], name='number')) >>> obj number one two three state Ohio 012 Colorado 345 >>> >>> obj2 = obj.stack() >>> obj2 state number Ohio one 0 two 1 three 2 Colorado one 3 two 4 three 5 dtype: int32 >>> >>> obj3 = pd.DataFrame({'left': obj2, 'right': obj2 + 5}, columns=pd.Index(['left', 'right'], name='side')) >>> obj3 side left right state number Ohio one 05 two 16 three 27 Colorado one 38 two 49 three 510 >>> >>> obj3.unstack('state') side left right state Ohio Colorado Ohio Colorado number one 0358 two 1469 three 25710 >>> >>> obj3.unstack('state').stack('side') state Colorado Ohio number side one left 30 right 85 two left 41 right 96 three left 52 right 107
>>> import numpy as np >>> import pandas as pd >>> obj = pd.DataFrame({'data1' : ['a'] * 4 + ['b'] * 4, 'data2' : np.random.randint(0, 4, 8)}) >>> obj data1 data2 0 a 2 1 a 1 2 a 1 3 a 2 4 b 1 5 b 2 6 b 0 7 b 0 >>> >>> obj.drop_duplicates() data1 data2 0 a 2 1 a 1 4 b 1 5 b 2 6 b 0 >>> >>> obj.drop_duplicates(subset='data2') data1 data2 0 a 2 1 a 1 6 b 0 >>> >>> obj.drop_duplicates(subset='data2', ignore_index=True) data1 data2 0 a 2 1 a 1 2 b 0
>>> import pandas as pd >>> obj = pd.DataFrame({'A': [0, 1, 2, 3, 4], 'B': [5, 6, 7, 8, 9], 'C': ['a', 'b', 'c', 'd', 'e']}) >>> obj A B C 005 a 116 b 227 c 338 d 449 e >>> >>> obj.replace(0, 5) A B C 055 a 116 b 227 c 338 d 449 e >>> >>> obj.replace({0: 10, 1: 100}) A B C 0105 a 11006 b 227 c 338 d 449 e >>> >>> obj.replace({'A': 0, 'B': 5}, 100) A B C 0100100 a 116 b 227 c 338 d 449 e >>> obj.replace({'A': {0: 100, 4: 400}}) A B C 01005 a 116 b 227 c 338 d 44009 e
>>> import pandas as pd >>> obj = pd.DataFrame({'A': ['bat', 'foo', 'bait'], 'B': ['abc', 'bar', 'xyz']}) >>> obj A B 0 bat abc 1 foo bar 2 bait xyz >>> >>> obj.replace(to_replace=r'^ba.$', value='new', regex=True) A B 0 new abc 1 foo new 2 bait xyz >>> >>> obj.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) A B 0 new abc 1 foo bar 2 bait xyz >>> >>> obj.replace(regex=r'^ba.$', value='new') A B 0 new abc 1 foo new 2 bait xyz >>> >>> obj.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'}) A B 0 new abc 1 xyz new 2 bait xyz >>> >>> obj.replace(regex=[r'^ba.$', 'foo'], value='new') A B 0 new abc 1 new new 2 bait xyz