This is an old revision of the document!
Table of Contents
MPI4PI
TOD
PyTorch
Consider the following simple python test script( “pytorch_test.py”):
import torch def test_pytorch(): print("PyTorch version:", torch.__version__) print("CUDA available:", torch.cuda.is_available()) if torch.cuda.is_available(): print("CUDA device:", torch.cuda.get_device_name(0)) device = torch.device("cuda") else: device = torch.device("cpu") # Simple tensor operation x = torch.tensor([1.0, 2.0, 3.0], device=device) y = torch.tensor([4.0, 5.0, 6.0], device=device) z = x + y print("Tensor operation result:", z) test_pytorch()
To test it on the unite cluster you can use the folling sbatch scrpit to run it:
#!/bin/bash #SBATCH --job-name=pytorch_test #SBATCH --output=pytorch_test.out #SBATCH --error=pytorch_test.err #SBATCH --time=00:10:00 #SBATCH --partition=a40 #SBATCH --gres=gpu:1 #SBATCH --mem=4G #SBATCH --cpus-per-task=2 # Load necessary modules (modify based on your system) module load python/pytorch-2.5.1-llvm-cuda-12.3-python-3.13.1-llvm # Activate your virtual environment if needed # source ~/your_env/bin/activate # Run the PyTorch script python3.13 pytorch_test.py
Pandas
Consider the following simple python test script( “pandas_test.py”):
import pandas as pd import numpy as np # Create a simple DataFrame data = { 'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8], 'C': [9, 10, 11, 12] } df = pd.DataFrame(data) print("Original DataFrame:") print(df) # Test basic operations print("\nSum of each column:") print(df.sum()) print("\nMean of each column:") print(df.mean()) # Adding a new column df['D'] = df['A'] + df['B'] print("\nDataFrame after adding new column D (A + B):") print(df) # Filtering rows filtered_df = df[df['A'] > 2] print("\nFiltered DataFrame (A > 2):") print(filtered_df) # Check if NaN values exist print("\nCheck for NaN values:") print(df.isna().sum())
You can use the following snatch script to run it:
#!/bin/bash #SBATCH --job-name=pytorch_test #SBATCH --output=pytorch_test.out #SBATCH --error=pytorch_test.err #SBATCH --time=00:10:00 #SBATCH --partition=a40 #SBATCH --gres=gpu:1 #SBATCH --mem=4G #SBATCH --cpus-per-task=2 # Load necessary modules (modify based on your system) module load python/3.13.1-llvm module load python/3.13/pandas/2.2.3 # Activate your virtual environment if needed # source ~/your_env/bin/activate # Run the PyTorch script python3.13 pandas_test.py
Simple C/C++ program
The following is a simple C/C++ program which performs element-wise addition of 2 vectors. It does not use any dependent libraries:
#include <stdio.h> #include <stdlib.h> #include <time.h> #include <sys/time.h> /* * Perform element-wise addition of two vectors * * Parameters: * a: First input vector * b: Second input vector * result: Output vector (a + b) * size: Number of elements in vectors */ void vector_addition(const double *a, const double *b, double *result, size_t size) { for (size_t i = 0; i < size; i++) { result[i] = a[i] + b[i]; } } int main() { const size_t size = 10000000; printf("========================================\n"); printf("Vector Addition Example in C\n"); printf("========================================\n"); printf("Vector size: %zu elements\n", size); printf("\nAllocating memory...\n"); double *vector_a = (double *)malloc(size * sizeof(double)); double *vector_b = (double *)malloc(size * sizeof(double)); double *result = (double *)malloc(size * sizeof(double)); if (vector_a == NULL || vector_b == NULL || result == NULL) { fprintf(stderr, "Error: Memory allocation failed!\n"); return 1; } printf("Initializing vectors...\n"); srand(time(NULL)); for (size_t i = 0; i < size; i++) { vector_a[i] = (double)rand() / RAND_MAX; vector_b[i] = (double)rand() / RAND_MAX; } printf("Performing vector addition...\n"); vector_addition(vector_a, vector_b, result, size); printf("First 5 elements of result:\n"); for (int i = 0; i < 5; i++) { printf(" result[%d] = %.6f\n", i, result[i]); } free(vector_a); free(vector_b); free(result); return 0; }
The following is the respective batch script for compiling and running the program. You can see the output of the program in the generated vector_sum_%j.out file.
#!/bin/bash #SBATCH --job-name=vector_sum #SBATCH --output=vector_sum_%j.out #SBATCH --error=vector_sum_%j.err #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=1 #SBATCH --time=00:10:00 #SBATCH --partition=unite echo "=========================================" echo "SLURM Job Information" echo "=========================================" echo "Job ID: $SLURM_JOB_ID" echo "Node: $SLURM_NODELIST" echo "Starting at: $(date)" echo "" # Load necessary modules module load gcc # Compile the program echo "Compiling vector_sum.c..." gcc -O3 -march=native -o vector_sum vector_sum.c -lm if [ $? -ne 0 ]; then echo "Error: Compilation failed!" exit 1 fi echo "Compilation successful!" echo "" echo "Running vector_sum..." ./vector_sum echo "" echo "Job finished at: $(date)"
Simple Python program
The following is a simple Python program which performs element-wise addition of 2 vectors. It does not use any dependent libraries:
#!/usr/bin/env python3 import random import time def vector_addition(a, b): """ Perform element-wise addition of two vectors Parameters: a: First input vector (list) b: Second input vector (list) Returns: result: Output vector (a + b) """ return [a[i] + b[i] for i in range(len(a))] def main(): size = 10000000 print("=" * 40) print("Vector Addition Example in Python") print("=" * 40) print(f"Vector size: {size:,} elements") print("\nAllocating and initializing vectors...") random.seed(time.time()) vector_a = [random.random() for _ in range(size)] vector_b = [random.random() for _ in range(size)] print("Performing vector addition...") result = vector_addition(vector_a, vector_b) print("\nFirst 5 elements of result:") for i in range(5): print(f" result[{i}] = {result[i]:.6f}") if __name__ == "__main__": main()
The following is the respective batch script for running the program. You can see the output of the program in the generated vector_sum_%j.out file.
#!/bin/bash #SBATCH --job-name=vector_sum #SBATCH --output=vector_sum_%j.out #SBATCH --error=vector_sum_%j.err #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=1 #SBATCH --time=00:10:00 #SBATCH --partition=unite echo "=========================================" echo "SLURM Job Information" echo "=========================================" echo "Job ID: $SLURM_JOB_ID" echo "Node: $SLURM_NODELIST" echo "Starting at: $(date)" echo "" echo "Python version:" python3 --version echo "" echo "Python executable location:" which python3 echo "" echo "=========================================" echo "Running vector_sum.py" echo "=========================================" echo "" python3 vector_sum.py echo "" echo "=========================================" echo "Job finished at: $(date)" echo "========================================="
Python program with dependencies
The following is a simple Python program which computes the sum of 2 vectors 3 times using NumPy.
#!/usr/bin/env python3 import numpy as np import time def vector_addition(size=10000000): print(f"Initializing vectors of size {size:,}...") vector_a = np.random.rand(size) vector_b = np.random.rand(size) print("Performing vector addition...") result = vector_a + vector_b return result def main(): print("=" * 60) print("Vector Addition Example using NumPy") print("=" * 60) sizes = [1000000, 10000000, 50000000] for size in sizes: result = vector_addition(size) print(f"\nVector size: {size:,} elements") print(f"First 5 elements of result: {result[:5]}") print("-" * 60) if __name__ == "__main__": main()
The following is the respective batch script for compiling and running the program. You can see the output of the program in the generated vector_sum_numpy_%j.out file. The script showcases 3 different ways for managing Python dependencies based on your use case. This is controlled through the PYTHON_ENV_METHOD variable defined in the script. Please read the comments in the script for the configuration of the environment which you need to do on the login node. The dependency in the current example is NumPy but the approach for dependency management is generic.
#!/bin/bash #SBATCH --job-name=vector_sum_numpy #SBATCH --output=vector_sum_numpy_%j.out #SBATCH --error=vector_sum_numpy_%j.err #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=1 #SBATCH --time=00:15:00 #SBATCH --partition=unite ################################################################################ # CONFIGURATION: Choose your Python environment method ################################################################################ # Options: "venv", "conda", or "module" PYTHON_ENV_METHOD="venv" VENV_PATH="$HOME/venvs/numpy_env" CONDA_ENV_NAME="numpy_env" CONDA_MODULE="anaconda3" PYTHON_MODULE="python/3.13" NUMPY_MODULE="python/3.13/numpy/2.2.2" ################################################################################ # Setup Instructions (run once on login node before first job submission) ################################################################################ # For venv: # python3 -m venv $HOME/venvs/numpy_env # source $HOME/venvs/numpy_env/bin/activate # pip install numpy # deactivate # # For conda: # module load anaconda3 # conda create -n numpy_env python=3.9 numpy # conda deactivate # # For module: # Check available modules: module avail python # You need to load both python and numpy modules in the script. The numpy module needs to be compatible with the python module. # Then you need to modify PYTHON_MODULE and NUMPY_MODULE variables above accordingly. ################################################################################ echo "=========================================" echo "SLURM Job Information" echo "=========================================" echo "Job ID: $SLURM_JOB_ID" echo "Node: $SLURM_NODELIST" echo "Starting at: $(date)" echo "" echo "Python environment method: $PYTHON_ENV_METHOD" echo "" if [ "$PYTHON_ENV_METHOD" = "venv" ]; then echo "Activating Python virtual environment..." if [ -f "$VENV_PATH/bin/activate" ]; then source "$VENV_PATH/bin/activate" echo "Virtual environment activated: $VENV_PATH" else echo "ERROR: Virtual environment not found at $VENV_PATH" echo "Please create it first (see setup instructions in script)" exit 1 fi elif [ "$PYTHON_ENV_METHOD" = "conda" ]; then echo "Activating Conda environment..." module load "$CONDA_MODULE" source activate "$CONDA_ENV_NAME" echo "Conda environment activated: $CONDA_ENV_NAME" elif [ "$PYTHON_ENV_METHOD" = "module" ]; then echo "Loading environment modules..." module load "$PYTHON_MODULE" module load "$NUMPY_MODULE" echo "Modules loaded: $PYTHON_MODULE, $NUMPY_MODULE" else echo "ERROR: Invalid PYTHON_ENV_METHOD='$PYTHON_ENV_METHOD'" echo "Valid options: venv, conda, module" exit 1 fi # Verify Python and NumPy echo "" echo "Python3 version:" python3 --version echo "" echo "NumPy version:" python3 -c "import numpy; print(f'NumPy {numpy.__version__}')" echo "" echo "Python executable location:" which python3 echo "" echo "=========================================" echo "Running vector_sum_numpy.py" echo "=========================================" echo "" python3 vector_sum_numpy.py echo "" echo "Cleaning up environment..." if [ "$PYTHON_ENV_METHOD" = "venv" ]; then deactivate echo "Virtual environment deactivated" elif [ "$PYTHON_ENV_METHOD" = "conda" ]; then conda deactivate echo "Conda environment deactivated" elif [ "$PYTHON_ENV_METHOD" = "module" ]; then # Modules are automatically unloaded when job ends echo "Modules will be unloaded when job completes" fi echo "" echo "=========================================" echo "Job finished at: $(date)" echo "========================================="
C/C++ program with dependencies
The following is a simple C/C++ program which compresses and decompresses a string using zLib.
#include <stdio.h> #include <string.h> #include <zlib.h> #include <stdlib.h> #define CHUNK 16384 int main() { const char *original = "Hello, this is a test string for zlib compression! " "We'll compress this text and then decompress it to verify it works."; printf("Original string: %s\n", original); printf("Original length: %lu bytes\n\n", strlen(original)); // Compression uLong source_len = strlen(original) + 1; uLong compressed_len = compressBound(source_len); unsigned char *compressed = (unsigned char *)malloc(compressed_len); if (compress(compressed, &compressed_len, (unsigned char *)original, source_len) != Z_OK) { fprintf(stderr, "Compression failed!\n"); free(compressed); return 1; } printf("Compressed length: %lu bytes\n", compressed_len); printf("Compression ratio: %.2f%%\n\n", 100.0 * (1.0 - (double)compressed_len / source_len)); // Decompression uLong decompressed_len = source_len; unsigned char *decompressed = (unsigned char *)malloc(decompressed_len); if (uncompress(decompressed, &decompressed_len, compressed, compressed_len) != Z_OK) { fprintf(stderr, "Decompression failed!\n"); free(compressed); free(decompressed); return 1; } printf("Decompressed string: %s\n", decompressed); printf("Decompressed length: %lu bytes\n\n", decompressed_len); if (strcmp(original, (char *)decompressed) == 0) { printf("SUCCESS: Original and decompressed strings match!\n"); } else { printf("ERROR: Strings don't match!\n"); } free(compressed); free(decompressed); return 0; }
The following is the respective batch script for compiling and running the program. You can see the output of the program in the generated zlib_compress_%j.out file. C/C++ dependencies are generally compiled from source. If this is the case you can use the Simple C/C++ program example. The batch script showcases loading the dependent library on the system and linking against it. The cluster uses modules for managing the installed dependencies. Make sure to use compatible compiler and library.
#!/bin/bash #SBATCH --job-name=zlib_compress #SBATCH --output=zlib_compress_%j.out #SBATCH --error=zlib_compress_%j.err #SBATCH --time=00:05:00 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=1 #SBATCH --partition=unite module load gcc/latest module load zlib/1/latest-gcc echo "Loaded modules:" module list echo "" echo "Compiling zlib_compress.c..." gcc -o zlib_compress zlib_compress.c -lz if [ $? -eq 0 ]; then echo "Compilation successful!" echo "" echo "Running the program:" echo "====================" ./zlib_compress else echo "Compilation failed!" exit 1 fi
MPI
The following is an example C/C++ application which uses MPI to perform element-wise addition of two vectors. Each MPI task computes the addition of its local region and then sends it back to the leader. Using MPI with Python is similar assuming that you know how to manage Python dependencies on the cluster which is described in the previous section. What is important here is to understand how to manage the resources of the system.
#include <stdio.h> #include <stdlib.h> #include <mpi.h> #define VECTOR_SIZE 100000 int main(int argc, char** argv) { int rank, size; int i; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); int local_size = VECTOR_SIZE / size; int *local_a = (int*)malloc(local_size * sizeof(int)); int *local_b = (int*)malloc(local_size * sizeof(int)); int *local_c = (int*)malloc(local_size * sizeof(int)); int *a = NULL; int *b = NULL; int *c = NULL; if (rank == 0) { a = (int*)malloc(VECTOR_SIZE * sizeof(int)); b = (int*)malloc(VECTOR_SIZE * sizeof(int)); c = (int*)malloc(VECTOR_SIZE * sizeof(int)); for (i = 0; i < VECTOR_SIZE; i++) { a[i] = i + 1; } for (i = 0; i < VECTOR_SIZE; i++) { b[i] = (i + 1) * 2; } } MPI_Scatter(a, local_size, MPI_INT, local_a, local_size, MPI_INT, 0, MPI_COMM_WORLD); MPI_Scatter(b, local_size, MPI_INT, local_b, local_size, MPI_INT, 0, MPI_COMM_WORLD); printf("Process %d: Adding %d elements\n", rank, local_size); for (i = 0; i < local_size; i++) { local_c[i] = local_a[i] + local_b[i]; } MPI_Gather(local_c, local_size, MPI_INT, c, local_size, MPI_INT, 0, MPI_COMM_WORLD); if (rank == 0) { printf("\nFirst 5 elements of (A + B): "); for (i = 0; i < 5; i++) { printf("%d ", c[i]); } printf("\n"); free(a); free(b); free(c); } free(local_a); free(local_b); free(local_c); MPI_Finalize(); return 0; }
The following is the respective batch script for compiling and running the program. You can see the output of the program in the generated vector_sum_mpi_%j.out file. The ntasks parameter of the batch script specifies the number of MPI tasks to be started. This is how you can leverage the resources of the system in order to increase the work done in parallel by your application. The MPI tasks are not guaranteed to be executed on different nodes in the cluster, they can also be on a separate physical cores. What is guaranteed is that every task will have the resources to execute in parallel.
#!/bin/bash #SBATCH --job-name=vector_sum_mpi #SBATCH --output=vector_mpi_%j.out #SBATCH --error=vector_mpi_%j.err #SBATCH --ntasks=4 #SBATCH --time=00:05:00 #SBATCH --partition=unite module load mpi/latest echo "Compiling vector_sum_mpi.c..." mpicc -o vector_sum_mpi vector_sum_mpi.c if [ $? -ne 0 ]; then echo "Compilation failed!" exit 1 fi echo "Compilation successful!" echo "Running with $SLURM_NTASKS MPI processes..." echo "----------------------------------------" mpirun -np $SLURM_NTASKS ./vector_sum_mpi echo "----------------------------------------" echo "Job completed!"
