-
Notifications
You must be signed in to change notification settings - Fork 47
Expand file tree
/
Copy pathchunksmemory.py
More file actions
50 lines (37 loc) · 1.52 KB
/
chunksmemory.py
File metadata and controls
50 lines (37 loc) · 1.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
import numpy as np
from memory_profiler import profile
# Function for normal pandas processing
@profile
def normal_pandas_processing():
# Create a large DataFrame with random numbers
df = pd.DataFrame(np.random.rand(1000000, 5), columns=[f'Column_{i+1}' for i in range(5)])
# Perform some operations
result_normal = df.groupby('Column_1').mean()
# Display the result (just to do something with it)
print(result_normal.head())
# Function for chunk pandas processing with memory profiling
@profile
def chunk_pandas_processing():
# Set the size of the dataset
rows = 1000000 # 1 million rows
cols = 5
# Create a DataFrame with random numbers
data = np.random.rand(rows, cols)
df = pd.DataFrame(data, columns=[f'Column_{i+1}' for i in range(cols)])
# Initialize an empty result DataFrame
result_chunk = pd.DataFrame()
# Process the dataset in chunks
chunk_size = 100000 # 100,000 rows per chunk
for chunk in np.array_split(df, range(chunk_size, rows, chunk_size)):
# Perform operations on each chunk
chunk_result = chunk.groupby('Column_1').mean()
# Concatenate the results
result_chunk = pd.concat([result_chunk, chunk_result])
# Final result after processing all chunks
result_chunk_final = result_chunk.groupby('Column_1').mean()
# Display the final result (just to do something with it)
print(result_chunk_final.head())
# Call the functions
normal_pandas_processing()
chunk_pandas_processing()