-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimarray.py
More file actions
executable file
·694 lines (571 loc) · 21.9 KB
/
simarray.py
File metadata and controls
executable file
·694 lines (571 loc) · 21.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
#!/usr/bin/env python3
"""
SimArray: A script to generate simulation folders, dispatch files, and compress folders.
This script is designed for use in simulation studies where multiple parameter
combinations need to be tested. It can generate folders based on input parameters,
dispatch files into those folders, and optionally compress the results into tarballs.
Usage:
Run the script from the command line with the appropriate arguments.
Use the `--help` flag for more details.
Author: Raphaël Scherrer
URL: https://github.com/rscherrer/simarray
License: MIT License
"""
# Copyright (c) 2025 Raphaël Scherrer
#
# This script is licensed under the MIT License.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import argparse
from contextlib import ExitStack
import os
import shutil
import tarfile
import warnings
import sys
# Define the version of the script
SCRIPT_VERSION = "1.3.3"
# Function to parse command-line arguments
def parse_arguments():
"""Parse and return command-line arguments."""
parser = argparse.ArgumentParser(
description=(
"SimArray: A script to generate simulation folders, dispatch "
"files, and compress folders."
)
)
parser.add_argument(
'filenames', nargs='*',
help="List of filenames to process"
)
parser.add_argument(
'--folder',
help="Path to a folder containing files to process"
)
parser.add_argument(
'--separator', default='_',
help="Separator to use in folder names (default: '_')"
)
parser.add_argument(
'--target', default='.',
help="Target folder to save results (default: current directory)"
)
parser.add_argument(
'--by', type=int,
help="Number of folders per batch (optional)"
)
parser.add_argument(
'--batch-prefix', default='batch_',
help="Prefix for batch folders (default: 'batch_')"
)
parser.add_argument(
'--sim-prefix', default='sim',
help="Prefix for simulation folder names (default: 'sim')"
)
parser.add_argument(
'--replicates', type=int, default=1,
help="Number of replicates per parameter combination (default: 1)"
)
parser.add_argument(
'--replicate-prefix', default='r',
help="Prefix for replicate identifiers (default: 'r')"
)
parser.add_argument(
'--template',
help="Path to the template parameter file (optional)"
)
parser.add_argument(
'--output-param-file', default=None,
help=(
"Name of the parameter file in the final folder (default: same "
"as template or 'parameters.txt')"
)
)
parser.add_argument(
'--param-separator', default=' ',
help=(
"Separator between parameter name and value in the template file "
"(default: ' ')"
)
)
parser.add_argument(
'--dispatch', nargs='*',
help="List of files to copy into each simulation folder (optional)"
)
parser.add_argument(
'--compress', action='store_true',
help=(
"Compress each batch into a tarball (or compress everything if "
"no batches are specified)"
)
)
parser.add_argument(
'--tarball-name', default='all_simulations',
help="Name of the global tarball (default: 'all_simulations')"
)
parser.add_argument(
'--verbose', type=int, choices=[0, 1, 2], default=1,
help="Verbosity level: 0 (silent), 1 (default), 2 (detailed)"
)
parser.add_argument(
'--dispatch-only', action='store_true',
help="Only dispatch files into existing folders (skip folder generation)"
)
parser.add_argument(
'--dispatch-recursive', action='store_true',
help="Recursively look for simulation folders in batch folders (default: False)"
)
parser.add_argument(
'--compress-only', action='store_true',
help="Only compress existing folders (skip folder generation)"
)
parser.add_argument(
'--version', action='version', version=f"SimArray {SCRIPT_VERSION}",
help="Show program's version number and exit"
)
parser.add_argument(
'--compress-all', action='store_true',
help="Compress all simulation folders into a single archive (default: False)"
)
return parser.parse_args()
# Function to dispatch files into the specified folders
def dispatch_files(args, target_folders):
"""
Dispatch files into the specified folders.
"""
# Verbose if needed
if args.verbose >= 1:
print(
f"Dispatching files into {len(target_folders)} folders in target '{args.target}'..."
)
# For each target folder...
for folder in target_folders:
# Verbose if needed
if args.verbose == 2:
print(f"Dispatching files into folder: {folder}")
# Copy the files into the folder
for file_to_copy in args.dispatch:
if os.path.isfile(file_to_copy):
shutil.copy(file_to_copy, folder)
else:
raise ValueError(
f"File '{file_to_copy}' specified in --dispatch does "
f"not exist or is not a file."
)
# Verbose if needed
if args.verbose >= 1:
print("File dispatch completed.")
# Function to handle the dispatch-only mode
def dispatch_only_mode(args):
"""
Handle the dispatch-only mode: dispatch files into existing simulation folders,
optionally searching recursively within batch folders.
"""
# Ensure files to dispatch are provided
if not args.dispatch:
raise ValueError(
"No files specified for dispatch. Use the --dispatch argument "
"to specify files."
)
# Determine the target directory
target_dir = args.target if args.target else os.getcwd()
# If recursive...
if args.dispatch_recursive:
# Look for batch folders within the target directory
batch_folders = [
os.path.join(target_dir, f)
for f in os.listdir(target_dir)
if os.path.isdir(os.path.join(target_dir, f))
and f.startswith(args.batch_prefix)
]
# Look for simulation folders within each batch folder
existing_folders = []
for batch_folder in batch_folders:
existing_folders.extend([
os.path.join(batch_folder, f)
for f in os.listdir(batch_folder)
if os.path.isdir(os.path.join(batch_folder, f))
and f.startswith(args.sim_prefix)
])
else:
# Look for simulation folders directly in the target directory
existing_folders = [
os.path.join(target_dir, f)
for f in os.listdir(target_dir)
if os.path.isdir(os.path.join(target_dir, f))
and f.startswith(args.sim_prefix)
]
# Check if any simulation folders were found
if not existing_folders:
warnings.warn(
f"No simulation folders found in '{target_dir}' starting with "
f"prefix '{args.sim_prefix}'."
)
return 0
# Dispatch files into the found simulation folders
dispatch_files(args, existing_folders)
return 0
# Function to handle the compress-only mode
def compress_only_mode(args):
"""
Handle the compress-only mode: compress simulation folders or batch folders
based on the provided arguments.
"""
# Determine the target directory
target_dir = args.target if args.target else os.getcwd()
# If compressing all simulation folders...
if args.compress_all:
# Look for simulation folders in the target directory
simulation_folders = [
os.path.join(target_dir, f)
for f in os.listdir(target_dir)
if os.path.isdir(os.path.join(target_dir, f))
and f.startswith(args.sim_prefix)
]
# If simulation folders are found...
if simulation_folders:
# Verbose if needed
if args.verbose >= 1:
print(
f"Found {len(simulation_folders)} simulation folders "
"for compression."
)
# Compress all simulation folders into one tarball
tarball_name = os.path.join(
target_dir, f"{args.tarball_name}.tar.gz"
)
with tarfile.open(tarball_name, "w:gz") as tar:
for folder in simulation_folders:
tar.add(folder, arcname=os.path.basename(folder))
if args.verbose >= 1:
print(
f"Compressed all simulation folders into '{tarball_name}'"
)
else:
# Issue a warning if no simulation folders are found
warnings.warn(
f"No simulation folders found in '{target_dir}' starting "
f"with prefix '{args.sim_prefix}'."
)
else:
# Look for batch folders in the target directory
batch_folders = [
os.path.join(target_dir, f)
for f in os.listdir(target_dir)
if os.path.isdir(os.path.join(target_dir, f))
and f.startswith(args.batch_prefix)
]
# If batch folders are found...
if batch_folders:
# Verbose if needed
if args.verbose >= 1:
print(
f"Found {len(batch_folders)} batch folders for "
"compression."
)
# Compress each batch folder into its own tarball
for batch_folder in batch_folders:
tarball_name = f"{batch_folder}.tar.gz"
with tarfile.open(tarball_name, "w:gz") as tar:
tar.add(
batch_folder, arcname=os.path.basename(batch_folder)
)
if args.verbose >= 1:
print(
f"Compressed batch folder '{batch_folder}' into "
f"'{tarball_name}'"
)
else:
# Issue a warning if no batch folders are found
warnings.warn(
f"No batch folders found in '{target_dir}' starting with "
f"prefix '{args.batch_prefix}'."
)
# Function to determine the name of the output parameter file
def get_output_param_file_name(args):
"""
Determine the name of the output parameter file based on the provided arguments.
"""
# If an output parameter file is specified, use that
if args.output_param_file:
return args.output_param_file
# If a template is provided, use its name
if args.template:
return os.path.basename(args.template)
# If no template is provided, use a default name
return "parameters.txt"
# Function to get the filenames from the specified folder
def get_filenames_from_folder(args):
"""
Get filenames from the specified folder and update the args.filenames list.
"""
# If filenames are provided...
if args.filenames:
# Prepend the folder path to each filename
args.filenames = [os.path.join(args.folder, f) for f in args.filenames]
else:
# If no filenames are provided, take all files in the folder as input
args.filenames.extend(
[
os.path.join(args.folder, f) for f in os.listdir(args.folder)
if os.path.isfile(os.path.join(args.folder, f))
]
)
# Function to check if files are provided
def check_files_provided(filenames):
"""
Ensure that files are provided. Raise an error if no files are found.
"""
# Error if not
if not filenames:
raise ValueError("No files provided. Use filenames or the --folder option.")
# Function to check if all files have the same number of lines
def check_line_counts(filenames):
"""
Check if all files have the same number of lines.
"""
# Count lines
line_counts = []
for filename in filenames:
with open(filename, 'r', encoding='utf-8') as f:
line_counts.append(sum(1 for _ in f))
# Error if not
if len(set(line_counts)) > 1:
raise ValueError("Files do not have the same number of lines.")
# Function to generate all folder names based on parameters
def get_all_folder_names(args, parameter_names, parameter_values):
"""
Generate all folder names based on the provided arguments, parameter names,
and parameter values.
"""
# Prepare
all_folders = []
# For each combination of parameter values (one line per file)
for values in zip(*parameter_values):
# Strip whitespace and assemble base folder name
values = [value.strip() for value in values]
base_folder_name = (
f"{args.sim_prefix}{args.separator}" +
f"{args.separator}".join(
f"{param}{args.separator}{args.separator.join(value.split(args.param_separator))}"
for param, value in zip(parameter_names, values)
)
)
# Create replicate folders
for repl in range(1, args.replicates + 1):
folder_name = (
f"{base_folder_name}{args.separator}{args.replicate_prefix}{repl}"
)
all_folders.append((folder_name, dict(zip(parameter_names, values))))
# Exit
return all_folders
# Function to get the target path for a folder
def get_target_path(args, folder_name, batch_number=None):
"""
Determine the target path for a folder, considering batching if enabled.
"""
# If batching...
if args.by and batch_number:
# Include the batch folder in the path
batch_folder = os.path.join(args.target, f"{args.batch_prefix}{batch_number}")
return os.path.join(batch_folder, folder_name)
# If batching is not enabled, use the target directory directly
return os.path.join(args.target, folder_name)
# Function to generate the lines for the output parameter file
def generate_param_file_lines(args, parameter_names, param_values):
"""
Generate the lines for the output parameter file.
"""
# If a template is provided
if args.template:
# Read the template file
with open(args.template, 'r', encoding='utf-8') as template_file:
template_lines = template_file.readlines()
# Check for duplicate parameter names in the template file
seen_parameters = set()
for line in template_lines:
line_stripped = line.strip()
if any(
line_stripped.startswith(param + args.param_separator)
for param in parameter_names
):
param_name = line_stripped.split(args.param_separator, 1)[0]
if param_name in seen_parameters:
raise ValueError(
f"Duplicate parameter name '{param_name}' "
f"found in the template file."
)
seen_parameters.add(param_name)
# Modify the template file
modified_lines = []
found_parameters = set()
for line in template_lines:
line_stripped = line.strip()
if any(
line_stripped.startswith(param + args.param_separator)
for param in parameter_names
):
param_name = line_stripped.split(args.param_separator, 1)[0]
modified_lines.append(
f"{param_name}{args.param_separator}{param_values[param_name]}\n"
)
found_parameters.add(param_name)
else:
modified_lines.append(line)
# Add missing parameters
for param in parameter_names:
if param not in found_parameters:
modified_lines.append(
f"{param}{args.param_separator}{param_values[param]}\n"
)
else:
# If no template is provided, create a new parameter file
modified_lines = [
f"{param}{args.param_separator}{value}\n"
for param, value in param_values.items()
]
# Exit
return modified_lines
# Function to write the parameter file
def write_param_file(
args, target_path, parameter_names, param_values,
param_file_name
):
"""
Write the output parameter file to the target folder.
"""
# If verbosity level is 2, print the folder name
if args.verbose == 2:
print(f"Created folder: {target_path}")
# Generate the lines for the parameter file
modified_lines = generate_param_file_lines(
args, parameter_names, param_values
)
# Write the parameter file to the target folder
with open(
os.path.join(target_path, param_file_name), 'w', encoding='utf-8'
) as output_file:
output_file.writelines(modified_lines)
# Function to handle the final compression step
def final_compression(args, batch_number=None):
"""
Handle the final compression step: compress all simulations or batch folders.
"""
# Verbose if needed
if args.verbose >= 1:
print("Compressing folders...")
# If batching...
if args.by and batch_number:
# Compress each batch folder
for batch_num in range(1, batch_number + 1):
batch_folder = os.path.join(args.target, f"{args.batch_prefix}{batch_num}")
tarball_name = f"{batch_folder}.tar.gz"
with tarfile.open(tarball_name, "w:gz") as tar:
tar.add(batch_folder, arcname=os.path.basename(batch_folder))
if args.verbose >= 1:
print(f"Compressed batch folder '{batch_folder}' into '{tarball_name}'")
else:
# Compress everything into a single tarball
tarball_name = os.path.join(args.target, f"{args.tarball_name}.tar.gz")
with tarfile.open(tarball_name, "w:gz") as tar:
tar.add(args.target, arcname=os.path.basename(args.target))
if args.verbose >= 1:
print(f"Compressed all simulations into '{tarball_name}'")
# Main function to run the script
def main():
"""
This function runs the main logic of the script. It processes command line arguments,
generates simulation folders, dispatches files, and compresses folders as needed.
"""
# Parse command-line arguments
args = parse_arguments()
# Dispatch-only mode
if args.dispatch_only:
dispatch_only_mode(args)
return 0
# Compress-only mode
if args.compress_only:
compress_only_mode(args)
return 0
# Determine the name of the parameter file in the final folder
param_file_name = get_output_param_file_name(args)
# Collect files from folder if provided
if args.folder:
get_filenames_from_folder(args)
# Ensure there are files to process
check_files_provided(args.filenames)
# Check if all files have the same number of lines
check_line_counts(args.filenames)
# Open and process files
with ExitStack() as stack:
# Open all files
files = [
stack.enter_context(open(filename, 'r', encoding='utf-8'))
for filename in args.filenames
]
# Read the lines
parameter_names = [
os.path.splitext(os.path.basename(filename))[0]
for filename in args.filenames
]
# Prepare folder names
all_folders = get_all_folder_names(args, parameter_names, files)
# Prepare to count batches
batch_number = 0
# For each folder...
for i, (folder_name, param_values) in enumerate(all_folders):
# Increment batch logic
if args.by and i % args.by == 0:
batch_number += 1
# Determine the batch folder path if batching is enabled
target_path = get_target_path(args, folder_name, batch_number)
# Ensure the target folder exists
os.makedirs(target_path, exist_ok=True)
# Write the parameter file
write_param_file(
args, target_path, parameter_names, param_values,
param_file_name
)
# Dispatch files into the simulation folder
if args.dispatch:
dispatch_files(args, [target_path])
# Print the target path
if args.verbose == 2:
print(target_path)
# Initialize counters for folders and batches
total_folders = len(all_folders)
total_batches = batch_number if args.by else 0
# Verbose if needed
if args.verbose >= 1:
print("Folders created.")
if args.by:
print(f"Total folders created: {total_folders} across {total_batches} batches.")
else:
print(f"Total folders created: {total_folders}.")
# Compress if needed
if args.compress:
final_compression(args, batch_number)
# Final message
if args.verbose >= 1:
print("All done.")
# Exit
return 0
# Standalone execution
if __name__ == "__main__":
sys.exit(main())