Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit ebb66e8

Browse files
authored
Re-implement df.reset_index based on new structure (#849)
1 parent cb33e1e commit ebb66e8

File tree

2 files changed

+72
-89
lines changed

2 files changed

+72
-89
lines changed

sdc/datatypes/hpat_pandas_dataframe_functions.py

Lines changed: 22 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -2804,90 +2804,43 @@ def _df_set_column_unicode_key_impl(self, key, value):
28042804
ty_checker.raise_exc(key, 'str', 'key')
28052805

28062806

2807-
def sdc_pandas_dataframe_reset_index_codegen(drop, all_params, columns):
2807+
def sdc_pandas_dataframe_reset_index_codegen(drop, all_params, columns, column_loc):
28082808
"""
28092809
Example of generated implementation:
28102810
def _df_reset_index_impl(self, level=None, drop=False, inplace=False, col_level=0, col_fill=""):
2811-
old_index = self.index
2812-
result_0 = get_dataframe_data(self, 0)
2813-
result_1 = get_dataframe_data(self, 1)
2814-
result_2 = get_dataframe_data(self, 2)
2815-
return pandas.DataFrame({"index": old_index, "A": result_0, "B": result_1, "C": result_2})
2811+
result_0 = self._data[0][0]
2812+
result_1 = self._data[0][1]
2813+
return pandas.DataFrame({"A": result_0, "B": result_1})
28162814
"""
28172815
result_name = []
28182816
all_params_str = ', '.join(all_params)
28192817
func_lines = [f'def _df_reset_index_impl({all_params_str}):']
2820-
df = all_params[0]
2821-
if not drop.literal_value:
2822-
old_index = 'old_index'
2823-
func_lines += [f' {old_index} = {df}.index']
2824-
result_name.append((old_index, 'index'))
2825-
for i, c in enumerate(columns):
2826-
result_c = f'result_{i}'
2827-
func_lines += [
2828-
f' result_{i} = get_dataframe_data({df}, {i})'
2829-
]
2830-
result_name.append((result_c, c))
2831-
data = ', '.join(f'"{column_name}": {column}' for column, column_name in result_name)
2832-
func_lines += [f' return pandas.DataFrame({{{data}}})']
2833-
func_text = '\n'.join(func_lines)
2834-
2835-
global_vars = {'pandas': pandas,
2836-
'numpy': numpy,
2837-
'get_dataframe_data': get_dataframe_data}
2838-
2839-
return func_text, global_vars
2840-
2841-
2842-
def sdc_pandas_dataframe_reset_index_impl(self, drop=False):
2843-
all_params = ['self', 'level=None', 'drop=False', 'inplace=False', 'col_level=0', 'col_fill=""']
2844-
2845-
func_text, global_vars = sdc_pandas_dataframe_reset_index_codegen(drop, all_params, self.columns)
2846-
loc_vars = {}
2847-
exec(func_text, global_vars, loc_vars)
2848-
_apply_impl = loc_vars[f'_df_reset_index_impl']
2849-
2850-
return _apply_impl
2851-
2852-
2853-
def sdc_pandas_dataframe_reset_index_default_codegen(drop, all_params, columns):
2854-
"""
2855-
Example of generated implementation:
2856-
def _df_reset_index_impl(self, level=None, drop=False, inplace=False, col_level=0, col_fill=""):
2857-
old_index = self.index
2858-
result_0 = get_dataframe_data(self, 0)
2859-
result_1 = get_dataframe_data(self, 1)
2860-
return pandas.DataFrame({"index": old_index, "A": result_0, "B": result_1})
2861-
"""
2862-
result_name = []
2863-
all_params_str = ', '.join(all_params)
2864-
func_lines = [f'def _df_reset_index_impl({all_params_str}):']
2865-
df = all_params[0]
28662818
if not drop:
28672819
old_index = 'old_index'
2868-
func_lines += [f' {old_index} = {df}.index']
2820+
func_lines += [f' {old_index} = self.index']
28692821
result_name.append((old_index, 'index'))
28702822
for i, c in enumerate(columns):
2823+
col_loc = column_loc[c]
2824+
type_id, col_id = col_loc.type_id, col_loc.col_id
28712825
result_c = f'result_{i}'
28722826
func_lines += [
2873-
f' result_{i} = get_dataframe_data({df}, {i})'
2827+
f' result_{i} = self._data[{type_id}][{col_id}]'
28742828
]
28752829
result_name.append((result_c, c))
28762830
data = ', '.join(f'"{column_name}": {column}' for column, column_name in result_name)
28772831
func_lines += [f' return pandas.DataFrame({{{data}}})']
28782832
func_text = '\n'.join(func_lines)
28792833

28802834
global_vars = {'pandas': pandas,
2881-
'numpy': numpy,
2882-
'get_dataframe_data': get_dataframe_data}
2835+
'numpy': numpy}
28832836

28842837
return func_text, global_vars
28852838

28862839

2887-
def sdc_pandas_dataframe_reset_index_impl_default(self, drop=False):
2840+
def sdc_pandas_dataframe_reset_index_impl(self, drop=False):
28882841
all_params = ['self', 'level=None', 'drop=False', 'inplace=False', 'col_level=0', 'col_fill=""']
2889-
2890-
func_text, global_vars = sdc_pandas_dataframe_reset_index_default_codegen(drop, all_params, self.columns)
2842+
func_text, global_vars = sdc_pandas_dataframe_reset_index_codegen(drop, all_params,
2843+
self.columns, self.column_loc)
28912844
loc_vars = {}
28922845
exec(func_text, global_vars, loc_vars)
28932846
_apply_impl = loc_vars[f'_df_reset_index_impl']
@@ -2946,12 +2899,9 @@ def sdc_pandas_dataframe_reset_index(self, level=None, drop=False, inplace=False
29462899
if not (level is None or isinstance(level, types.Omitted)):
29472900
raise TypingError('{} Unsupported parameter level. Given: {}'.format(func_name, level))
29482901

2949-
if not (isinstance(drop, (types.Omitted, types.Boolean)) or drop is False):
2902+
if not isinstance(drop, (types.Omitted, types.Boolean, bool)):
29502903
ty_checker.raise_exc(drop, 'bool', 'drop')
29512904

2952-
if isinstance(drop, types.Omitted):
2953-
drop = False
2954-
29552905
if not (inplace is False or isinstance(inplace, types.Omitted)):
29562906
raise TypingError('{} Unsupported parameter inplace. Given: {}'.format(func_name, inplace))
29572907

@@ -2961,10 +2911,13 @@ def sdc_pandas_dataframe_reset_index(self, level=None, drop=False, inplace=False
29612911
if not (col_fill == '' or isinstance(col_fill, types.Omitted)):
29622912
raise TypingError('{} Unsupported parameter col_fill. Given: {}'.format(func_name, col_fill))
29632913

2964-
if not isinstance(drop, types.Literal):
2965-
if isinstance(drop, bool):
2966-
return sdc_pandas_dataframe_reset_index_impl_default(self, drop=drop)
2967-
else:
2968-
raise SDCLimitation('{} only work with Boolean literals drop.'.format(func_name))
2914+
if isinstance(drop, types.Literal):
2915+
literal_drop = drop.literal_value
2916+
return sdc_pandas_dataframe_reset_index_impl(self, drop=literal_drop)
2917+
elif isinstance(drop, types.Omitted):
2918+
return sdc_pandas_dataframe_reset_index_impl(self, drop=drop.value)
2919+
elif isinstance(drop, bool):
2920+
return sdc_pandas_dataframe_reset_index_impl(self, drop=drop)
2921+
2922+
raise SDCLimitation('Method {}(). Parameter drop is only supported as a literal.'.format(func_name))
29692923

2970-
return sdc_pandas_dataframe_reset_index_impl(self, drop=drop)

sdc/tests/test_dataframe.py

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,41 +1495,71 @@ def test_impl(df, drop):
14951495
with self.subTest(drop=drop):
14961496
with self.assertRaises(Exception) as raises:
14971497
hpat_func(df, drop)
1498-
msg = 'only work with Boolean literals drop'
1499-
self.assertIn(msg.format(types.bool_), str(raises.exception))
1498+
msg = 'drop is only supported as a literal'
1499+
self.assertIn(msg, str(raises.exception))
15001500

1501-
@dfRefactoringNotImplemented
1502-
def test_df_reset_index_drop_false_index_int(self):
1503-
def test_impl(df):
1504-
return df.reset_index(drop=False)
1501+
@dfRefactoringNotImplemented # required re-implementing DataFrame unboxing
1502+
def test_df_reset_index_drop_literal_index_int(self):
1503+
def gen_test_impl(drop):
1504+
def test_impl(df):
1505+
if drop == False: # noqa
1506+
return df.reset_index(drop=False)
1507+
else:
1508+
return df.reset_index(drop=True)
1509+
return test_impl
15051510

1506-
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0],
1507-
'B': np.arange(4.0)}, index=[5, 8, 4, 6])
1508-
hpat_func = self.jit(test_impl)
1511+
df = pd.DataFrame({
1512+
'A': [1.0, 2.0, np.nan, 1.0],
1513+
'B': np.arange(4.0)
1514+
}, index=[5, 8, 4, 6])
1515+
for drop in [True, False]:
1516+
with self.subTest(drop=drop):
1517+
test_impl = gen_test_impl(drop)
1518+
hpat_func = self.jit(test_impl)
1519+
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
15091520

1510-
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
1521+
@dfRefactoringNotImplemented # required re-implementing DataFrame boxing
1522+
def test_df_reset_index_drop_literal_index_int_no_unboxing(self):
1523+
def gen_test_impl(drop):
1524+
def test_impl():
1525+
df = pd.DataFrame({
1526+
'A': [1.0, 2.0, np.nan, 1.0],
1527+
'B': np.arange(4.0)
1528+
}, index=[5, 8, 4, 6])
1529+
if drop == False: # noqa
1530+
return df.reset_index(drop=False)
1531+
else:
1532+
return df.reset_index(drop=True)
1533+
return test_impl
15111534

1512-
@dfRefactoringNotImplemented
1513-
def test_df_reset_index_drop_true_index_int(self):
1535+
for drop in [True, False]:
1536+
with self.subTest(drop=drop):
1537+
test_impl = gen_test_impl(drop)
1538+
hpat_func = self.jit(test_impl)
1539+
pd.testing.assert_frame_equal(hpat_func(), test_impl())
1540+
1541+
@dfRefactoringNotImplemented # required re-implementing DataFrame unboxing
1542+
def test_df_reset_index_drop_default_index_int(self):
15141543
def test_impl(df):
1515-
return df.reset_index(drop=True)
1544+
return df.reset_index()
15161545

15171546
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0],
15181547
'B': np.arange(4.0)}, index=[5, 8, 4, 6])
15191548
hpat_func = self.jit(test_impl)
15201549

15211550
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
15221551

1523-
@dfRefactoringNotImplemented
1524-
def test_df_reset_index_drop_default_index_int(self):
1525-
def test_impl(df):
1552+
@dfRefactoringNotImplemented # required re-implementing DataFrame boxing
1553+
def test_df_reset_index_drop_default_index_int_no_unboxing(self):
1554+
def test_impl():
1555+
df = pd.DataFrame({
1556+
'A': [1.0, 2.0, np.nan, 1.0],
1557+
'B': np.arange(4.0)
1558+
}, index=[5, 8, 4, 6])
15261559
return df.reset_index()
15271560

1528-
df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0],
1529-
'B': np.arange(4.0)}, index=[5, 8, 4, 6])
15301561
hpat_func = self.jit(test_impl)
1531-
1532-
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
1562+
pd.testing.assert_frame_equal(hpat_func(), test_impl())
15331563

15341564
@skip_numba_jit
15351565
def test_df_reset_index_empty_df(self):

0 commit comments

Comments
 (0)