import pandas as pd
import numpy as np


list_2dim = [[1, 2, 3.5, 4],
        ['a', 'b', 'c', 'd'],
        [0.1, 3, 0.5, 8]]


df_2dim = pd.DataFrame(list_2dim)

df_2dim


len_list = [[1, 2, 3, 4, 5],
        ['a', 'b'],
        [0.1, 0.2, 0.5]]


df_len = pd.DataFrame(len_list)

df_len


my_dict = {'a':[10, 20, 30, 40],
           'b':[1, 2, 3, 4],
           'c':[5, 6, 7, 8]}

df_dict = pd.DataFrame(my_dict)

print(type(df_dict))
df_dict

<class 'pandas.core.frame.DataFrame'>


defi_dict = {'a':[10],
        'b':[1, 2, 3, 4],
        'c':[5, 6, 7]}

defi_dict

{'a': [10], 'b': [1, 2, 3, 4], 'c': [5, 6, 7]}


# defi_df = pd.DataFrame(defi_dict)

# ValueError: All arrays must be of the same length


list_2dim

[[1, 2, 3.5, 4], ['a', 'b', 'c', 'd'], [0.1, 3, 0.5, 8]]


df_index = pd.DataFrame(list_2dim,
                   index=['r1', 'r2', 'r3'],
                   columns=['c1', 'c2', 'c3', 'c4'])

df_index


my_dict

{'a': [10, 20, 30, 40], 'b': [1, 2, 3, 4], 'c': [5, 6, 7, 8]}


df_order = pd.DataFrame(my_dict, index=list('rows'), columns=list('cba'))

df_order


new_df = pd.DataFrame(my_dict, columns=list('abcd'))

new_df


my_dict

{'a': [10, 20, 30, 40], 'b': [1, 2, 3, 4], 'c': [5, 6, 7, 8]}


# df_item = pd.DataFrame(my_dict, index=list('qwerty'))

# ValueError: Length of values (4) does not match length of index (6)


pop_data = {'서울':[150, 180, 300],
            '경기':[200, 240, 450],
            '충청':[-10, 3, -13],
            '경상':[10, 20, 30],
            '전라':[5, 6, 7]
           }

pop_sample = pd.DataFrame(pop_data)
pop_sample


year_list= [2016, 2017, 2018]
pop_sample.index = year_list

pop_sample


pop_sample.index.name = 'year'

pop_sample


pop_sample.columns

Index(['서울', '경기', '충청', '경상', '전라'], dtype='object')


pop_sample.columns.name = 'location'

pop_sample


pop_sample.index = [1998, 1999, 2000]

pop_sample


pop_sample.rename({1998:1990})


pop_sample


pop_sample.rename({"전라":"제주"}, axis=1)


pop_sample.axes

[Int64Index([1998, 1999, 2000], dtype='int64'),
 Index(['서울', '경기', '충청', '경상', '전라'], dtype='object', name='location')]


pop_sample.reset_index()


pop_sample


pop_sample.reset_index(drop=True)


pop_sample


pop_sample.values

array([[150, 200, -10,  10,   5],
       [180, 240,   3,  20,   6],
       [300, 450, -13,  30,   7]], dtype=int64)


pop_sample.dtypes

location
서울    int64
경기    int64
충청    int64
경상    int64
전라    int64
dtype: object


pop_sample.size

15


len(pop_sample)

3


pop_sample.shape

(3, 5)


trans_df = pop_sample.T

trans_df


trans_df.index

Index(['서울', '경기', '충청', '경상', '전라'], dtype='object', name='location')


trans_df.columns

Int64Index([1998, 1999, 2000], dtype='int64')


pop_sample['서울']

1998    150
1999    180
2000    300
Name: 서울, dtype: int64


pop_sample.서울

1998    150
1999    180
2000    300
Name: 서울, dtype: int64


pop_sample.get("서울")

1998    150
1999    180
2000    300
Name: 서울, dtype: int64


pop_sample


print(type(pop_sample.iloc[0]))
print(pop_sample.iloc[0].name)

pop_sample.iloc[0]

<class 'pandas.core.series.Series'>
1998

location
서울    150
경기    200
충청    -10
경상     10
전라      5
Name: 1998, dtype: int64


pop_sample.loc[1999]

location
서울    180
경기    240
충청      3
경상     20
전라      6
Name: 1999, dtype: int64


pop_sample[['서울', '경기']]


pop_sample[['경기', '경상']].loc[1999]

location
경기    240
경상     20
Name: 1999, dtype: int64


pop_sample.loc[1999][['경기', '경상']]

location
경기    240
경상     20
Name: 1999, dtype: int64


pop_sample.loc[[1998, 2000]]


pop_sample.loc[[1998, 2000]]['충청']

1998   -10
2000   -13
Name: 충청, dtype: int64


pop_sample['충청'].loc[[1998, 2000]]

1998   -10
2000   -13
Name: 충청, dtype: int64


pop_sample['충청']

1998   -10
1999     3
2000   -13
Name: 충청, dtype: int64


pop_sample[['충청']]


pop_sample['충청'].loc[1999]

3


pop_sample[['충청']].loc[[1999]]


pop_sample[0:2]


pop_sample[0:3:2]


pop_sample[:2000]


pop_sample[::-1]


pop_sample.loc[:, '서울':'경기']


zr_data = np.zeros((4, 4))

zr_data

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])


zr_df = pd.DataFrame(zr_data)

zr_df


zr_df[:3][:2]


zr_df.iloc[:3, :2]


Col1 = pd.Series([0, 3, 'ks01', 2, 5])
Col2 = pd.Series(["big", "data", "is", "very", "good"])
Col3 = pd.Series([2.7, -5.0, 2.12, 8.31, -1.34])
Col4 = pd.Series([True, True, False, False, True])

col_list = [Col1, Col2, Col3, Col4]


df_prac = pd.DataFrame({'Col1':col_list[0], 'Col2':col_list[1],
                       'Col3':col_list[2], 'Col4':col_list[3]})

df_prac


df_prac.index = list("ABCDE")

df_prac


df_prac[['Col1', 'Col3']]


df_prac.loc[['A', 'C', 'D']]


df_prac[['Col1', 'Col2']].loc[['B', 'D']]


pop_sample['제주'] = 1

pop_sample


print(len(pop_sample))

pop_sample['부산'] = np.random.randint(1, 10, len(pop_sample))

pop_sample

3


pop_sample['수도권'] = pop_sample['서울'] + pop_sample['경기']

pop_sample


pop_sr = pd.Series([10, -10], index=[1998, 2000])

pop_sr

1998    10
2000   -10
dtype: int64


pop_sample["강원"] = pop_sr

pop_sample


no_label_sr = pd.Series([100, 200, 300])

no_label_sr

0    100
1    200
2    300
dtype: int64


pop_sample['test'] = no_label_sr

pop_sample


pop_sample.loc[2001] = 0

pop_sample


pop_sample.shape

(4, 10)


pop_sample.loc[2002] = np.random.randint(-100, 100, 10)

pop_sample


pop_sample.loc[2003] = {'서울':10, '경기':20, '충청':40, '경상':21, '전라':37,
                   '제주':103, '부산':28, '수도권':30, '강원':15, 'test':0}

pop_sample


pop_sample.loc[2004] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]

pop_sample


pop_sample.loc[2005] = pop_sample.loc[2002] * pop_sample.loc[2004]

pop_sample


pop_sample


del pop_sample['test']

pop_sample


pop_sample.drop('강원', axis=1)


pop_sample


pop_sample.drop(columns='강원', inplace=True)

pop_sample


pop_sample


pop_sample.drop(2004, inplace=True)

pop_sample


pop_sample.drop(['제주', '수도권'], axis=1, inplace=True)

pop_sample


pop_sample.drop([2003, 2005], inplace=True)

pop_sample


op_df = pd.DataFrame(np.random.randint(1, 10, 9).reshape(3, 3),
                    index=list('abc'),
                    columns=['서울', '경기', '인천'])

op_df


nd_df = pd.DataFrame(np.random.randint(1, 10, (20)).reshape(4, 5),
                    columns=['서울', '경기', '인천', '대전', '부산'],
                    index=list('abcd'))

nd_df


op_df + nd_df


op_df.add(nd_df, fill_value=0)


op_df - nd_df


op_df.sub(nd_df, fill_value=0)


op_df.loc[['a', 'c']] - nd_df.loc[['a', 'c']]


op_df * nd_df


op_df.mul(nd_df, fill_value=1)


op_df / nd_df


op_df.div(nd_df, fill_value=1)


op_df.floordiv(nd_df, fill_value=1)


op_df.mod(nd_df, fill_value=1)


my_df = pd.DataFrame(np.arange(12).reshape(3, 4),
                    index=[2010, 2011, 2012],
                    columns=list('abcd'))

my_df


my_sr = my_df.iloc[0]

my_sr

a    0
b    1
c    2
d    3
Name: 2010, dtype: int32


my_df + my_sr


my_df


my_sr = my_sr.rename(2020)
print(my_sr)

my_df + my_sr

a    0
b    1
c    2
d    3
Name: 2020, dtype: int32


zr_df = pd.DataFrame(np.zeros(20).reshape(4,5),
                    columns=list('abcde'))

zr_df


no_sr = pd.Series(np.arange(5))

no_sr

0    0
1    1
2    2
3    3
4    4
dtype: int32


zr_df.sub(no_sr)


zr_df.sub(no_sr, axis=0)


defi_sr = pd.Series([3, 3, 3], index=list('ace'))

defi_sr

a    3
c    3
e    3
dtype: int64


my_df


my_df + defi_sr


my_df.add(defi_sr)


my_df.add(defi_sr, fill_value=0)

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_11052\1613751262.py in <module>
----> 1 my_df.add(defi_sr, fill_value=0)

~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in f(self, other, axis, level, fill_value)
    427             # TODO: We could allow this in cases where we end up going
    428             #  through the DataFrame path
--> 429             raise NotImplementedError(f"fill_value {fill_value} not supported.")
    430 
    431         axis = self._get_axis_number(axis) if axis is not None else 1

NotImplementedError: fill_value 0 not supported.

	0	1	2	3	4
0	1	2	3.0	4.0	5.0
1	a	b	NaN	NaN	NaN
2	0.1	0.2	0.5	NaN	NaN

	0	1	2	3
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0

	0	1	2	3
0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0

	0	1
0	0.0	0.0
1	0.0	0.0
2	0.0	0.0

location	서울	경기	충청	경상	전라	제주	부산	수도권	강원	test
1998	150.0	200.0	-10.0	10.0	5.0	1.0	5.0	350.0	10.0	NaN
1999	180.0	240.0	3.0	20.0	6.0	1.0	5.0	420.0	NaN	NaN
2000	300.0	450.0	-13.0	30.0	7.0	1.0	2.0	750.0	-10.0	NaN
2001	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2002	-90.0	49.0	3.0	-77.0	-34.0	7.0	11.0	21.0	83.0	50.0
2003	10.0	20.0	40.0	21.0	37.0	103.0	28.0	30.0	15.0	0.0
2004	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	0.0
2005	-90.0	98.0	9.0	-308.0	-170.0	42.0	77.0	168.0	747.0	0.0

Try to 개발자 EthanJ의 성장 로그

Try to 개발자 EthanJ의 성장 로그

Pandas DataFrame 판다스 데이터프레임 본문

Pandas DataFrame 판다스 데이터프레임

Pandas DataFrame 판다스 데이터프레임¶

DataFrame¶

1. DataFrame 생성¶

2. `DataFrame` 속성¶

3. 인덱싱(indexing)¶

4. 슬라이싱 slicing¶

> 연습문제¶

5. columns, row 추가, 변경¶

5.1. columns 추가¶

5.2. row 추가¶

6. row, columns 삭제¶

6.1. columns 삭제¶

6.2. row 삭제¶

6.3. 두 개 이상의 columns, row 삭제¶

7. `DataFrame` 사이의 산술 연산 (Operation)¶

8. `DataFrame`과 `Series` 사이의 산술 연산
Operation between `DataFrame` and `Series`

'CS & DS > Numpy & Pandas' 카테고리의 다른 글

티스토리툴바

	Col1	Col2	Col3	Col4
0	0	big	2.70	True
1	3	data	-5.00	True
2	ks01	is	2.12	False
3	2	very	8.31	False
4	5	good	-1.34	True

	경기	대전	부산	서울	인천
a	15.0	NaN	NaN	5.0	10.0
b	12.0	NaN	NaN	8.0	11.0
c	11.0	NaN	NaN	10.0	13.0
d	NaN	NaN	NaN	NaN	NaN

	경기	대전	부산	서울	인천
a	-3.0	NaN	NaN	-3.0	2.0
b	2.0	NaN	NaN	-2.0	-7.0
c	-3.0	NaN	NaN	0.0	1.0
d	NaN	NaN	NaN	NaN	NaN

	경기	대전	부산	서울	인천
a	-3.0	-8.0	-5.0	-3.0	2.0
b	2.0	-2.0	-3.0	-2.0	-7.0
c	-3.0	-9.0	-3.0	0.0	1.0
d	-8.0	-5.0	-9.0	-8.0	-2.0

	경기	대전	부산	서울	인천
a	54.0	NaN	NaN	4.0	24.0
b	35.0	NaN	NaN	15.0	18.0
c	28.0	NaN	NaN	25.0	42.0
d	NaN	NaN	NaN	NaN	NaN

	경기	대전	부산	서울	인천
a	0.666667	NaN	NaN	0.25	1.500000
b	1.400000	NaN	NaN	0.60	0.222222
c	0.571429	NaN	NaN	1.00	1.166667
d	NaN	NaN	NaN	NaN	NaN

	경기	대전	부산	서울	인천
a	0.666667	0.125000	0.200000	0.250	1.500000
b	1.400000	0.500000	0.333333	0.600	0.222222
c	0.571429	0.111111	0.333333	1.000	1.166667
d	0.125000	0.200000	0.111111	0.125	0.500000

	경기	서울	인천
a	0.0	0.0	1.0
b	1.0	0.0	0.0
c	0.0	1.0	1.0
d	0.0	0.0	0.0

	경기	대전	부산	서울	인천
a	6.0	1.0	1.0	1.0	2.0
b	2.0	1.0	1.0	3.0	2.0
c	4.0	1.0	1.0	0.0	1.0
d	1.0	1.0	1.0	1.0	1.0

	a	b	c	d	e
0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0	0.0

« 2025/08 »
일	월	화	수	목	금	토
					1	2
3	4	5	6	7	8	9
10	11	12	13	14	15	16
17	18	19	20	21	22	23
24	25	26	27	28	29	30
31

	경기	서울	인천
a	0.0	0.0	1.0
b	1.0	0.0	0.0
c	0.0	1.0	1.0
d	0.0	0.0	0.0

	경기	대전	부산	서울	인천
a	6.0	1.0	1.0	1.0	2.0
b	2.0	1.0	1.0	3.0	2.0
c	4.0	1.0	1.0	0.0	1.0
d	1.0	1.0	1.0	1.0	1.0

	a	b	c	d	e
0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0	0.0

	a	b	c	d	e	0	1	2	3	4
0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	a	b	c	d	e
0	0.0	0.0	0.0	0.0	0.0
1	-1.0	-1.0	-1.0	-1.0	-1.0
2	-2.0	-2.0	-2.0	-2.0	-2.0
3	-3.0	-3.0	-3.0	-3.0	-3.0
4	NaN	NaN	NaN	NaN	NaN

Pandas Data pre-processing 판다스 데이터 전처리 (0)	2022.11.12
Pandas Data Loading 판다스 데이터 적재 (0)	2022.11.05
Pandas Series 판다스 시리즈 (0)	2022.11.03
Numpy array Manipulation 넘파이 배열 변형 (0)	2022.11.02
Numpy Operation 넘파이 연산 (0)	2022.10.30

Try to 개발자 EthanJ의 성장 로그

Pandas DataFrame 판다스 데이터프레임 본문

Pandas DataFrame 판다스 데이터프레임

Pandas DataFrame 판다스 데이터프레임¶

DataFrame¶

1. DataFrame 생성¶

2. DataFrame 속성¶

3. 인덱싱(indexing)¶

4. 슬라이싱 slicing¶

> 연습문제¶

5. columns, row 추가, 변경¶

5.1. columns 추가¶

5.2. row 추가¶

6. row, columns 삭제¶

6.1. columns 삭제¶

6.2. row 삭제¶

6.3. 두 개 이상의 columns, row 삭제¶

7. DataFrame 사이의 산술 연산 (Operation)¶

8. DataFrame과 Series 사이의 산술 연산 Operation between DataFrame and Series

'CS & DS > Numpy & Pandas' 카테고리의 다른 글

티스토리툴바

2. `DataFrame` 속성¶

7. `DataFrame` 사이의 산술 연산 (Operation)¶

8. `DataFrame`과 `Series` 사이의 산술 연산
Operation between `DataFrame` and `Series`

	경기	서울	인천
a	0.0	0.0	1.0
b	1.0	0.0	0.0
c	0.0	1.0	1.0
d	0.0	0.0	0.0

	경기	대전	부산	서울	인천
a	6.0	1.0	1.0	1.0	2.0
b	2.0	1.0	1.0	3.0	2.0
c	4.0	1.0	1.0	0.0	1.0
d	1.0	1.0	1.0	1.0	1.0

	a	b	c	d	e
0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0	0.0

	a	b	c	d	e	0	1	2	3	4
0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN