-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathnbdev_separate.py
More file actions
103 lines (90 loc) · 3.61 KB
/
nbdev_separate.py
File metadata and controls
103 lines (90 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# %%
import os
import nbformat
import re
from nbformat.notebooknode import NotebookNode, from_dict
# %%
# notebook_path = "notebooks/coding_projects/P1_ANOVA/anova copy.ipynb"
# with open(notebook_path, 'r', encoding='utf-8') as f:
# notebook = nbformat.read(f, as_version=4)
# notebook
# # with open(notebook_path, 'w', encoding='utf-8') as f:
# # nbformat.write(notebook, f)
# type(notebook['cells'][0])
# notebook['cells'][0]
# NotebookNode?
# %%
def split_import_and_code_cells(notebook_path):
"""
Process a Jupyter Notebook file, splitting cells with both import and non-import lines into two cells.
The first new cell will contain only import statements, and the second will contain the rest of the code.
"""
with open(notebook_path, "r", encoding="utf-8") as f:
notebook = nbformat.read(f, as_version=4)
new_cells = []
for cell in notebook["cells"]:
if cell["cell_type"] == "code":
# Split the lines in the cell
lines = cell["source"].splitlines()
# Extract leading blank lines or lines starting with "#|"
leading_lines = []
while lines and (lines[0].strip() == "" or lines[0].startswith("#|")):
leading_lines.append(lines.pop(0))
# Separate import statements and other code lines
import_lines = [
line for line in lines if re.match(r"^\s*import\b|^\s*from\b", line)
]
other_lines = [line for line in lines if line not in import_lines]
if import_lines and other_lines:
# Add the leading lines to the import cell
new_cells.append(
from_dict(
{
"cell_type": "code",
"metadata": {},
"source": "\n".join(leading_lines + import_lines),
"outputs": [],
}
)
)
# Add the leading lines to the other code cell
new_cells.append(
from_dict(
{
"cell_type": "code",
"metadata": {},
"source": "\n".join(leading_lines + other_lines),
"outputs": cell["outputs"],
}
)
)
else:
# If no split is needed, retain the original cell
new_cells.append(cell)
else:
# Retain non-code cells as is
new_cells.append(cell)
# Update the notebook with the modified cells
notebook["cells"] = new_cells
# Save the modified notebook
with open(notebook_path, "w", encoding="utf-8") as f:
nbformat.write(notebook, f)
def process_notebooks_in_folder(folder_path):
"""
Traverse all .ipynb files in a folder and apply the cell-splitting logic.
"""
for root, _, files in os.walk(folder_path):
for file in files:
if file.endswith(".ipynb"):
notebook_path = os.path.join(root, file)
print(f"Processing {notebook_path}")
split_import_and_code_cells(notebook_path)
if __name__ == "__main__":
# folder_path = input("Enter the path to the folder containing .ipynb files: ").strip()
# folder_path = "notebooks/coding_projects/P1_ANOVA"
folder_path = "notebooks"
if os.path.isdir(folder_path):
process_notebooks_in_folder(folder_path)
print("Processing complete.")
else:
print("Invalid folder path.")