Introduction to Parallel Computer Architecture Counting sort
counting_sort/counting_sort.cu
/* Host-side code to perform counting sort * Author: Naga Kandasamy * Date created: 2/15/2018 * Date modified: 3/17/2018 * * Compile as follows: make clean && make */ #include <stdlib.h> #include <stdio.h> #include <time.h> #include <sys/time.h> #include <string.h> #include <math.h> #include <limits.h> #include "counting_sort.h" #include "counting_sort_kernel.cu" extern "C" int counting_sort_gold (int *, int *, int, int); int get_random_integer (int, int); void print_array (int *, int); void print_min_and_max_in_array (int *, int); void compute_on_device (int *, int *, int, int); int check_if_sorted (int *, int); int compare_results (int *, int *, int); int main (int argc, char **argv) { int *input_array, *sorted_array_reference, *sorted_array_d; // Populate the input array with random integers between [0, RANGE] printf ("Generating input array with %d elements in the range 0 to %d.\n", NUM_ELEMENTS, RANGE); input_array = (int *)malloc (NUM_ELEMENTS * sizeof(int)); if (input_array == NULL){ printf("Cannot malloc memory for the input array. \n"); exit(0); } srand (time (NULL)); for (int i = 0; i < NUM_ELEMENTS; i++) input_array[i] = get_random_integer (0, RANGE); #ifdef DEBUG_FLAG print_array (input_array, NUM_ELEMENTS); print_min_and_max_in_array (input_array, NUM_ELEMENTS); #endif /* Sort the elements in the input array using the reference implementation. * The result is placed in sorted_array_reference. */ int status; sorted_array_reference = (int *)malloc (NUM_ELEMENTS * sizeof(int)); if (sorted_array_reference == NULL){ printf("Cannot malloc memory for the output array. \n"); exit(0); } memset(sorted_array_reference, 0, NUM_ELEMENTS); status = counting_sort_gold (input_array, sorted_array_reference, NUM_ELEMENTS, RANGE); if (status == 0){ exit (0); } status = check_if_sorted (sorted_array_reference, NUM_ELEMENTS); if (status == 0){ printf ("Error sorting the input array using the reference code. \n"); exit (0); } printf("Counting sort was successful on the CPU. \n"); #ifdef DEBUG_FLAG print_array (sorted_array_reference, NUM_ELEMENTS); #endif /* Write the function to sort the elements in the array in parallel fashion. * The result should be placed in sorted_array_mt. */ sorted_array_d = (int *)malloc (NUM_ELEMENTS * sizeof(int)); if (sorted_array_d == NULL){ printf("Cannot malloc memory for the output array. \n"); exit(0); } memset(sorted_array_d, 0, NUM_ELEMENTS); compute_on_device (input_array, sorted_array_d, NUM_ELEMENTS, RANGE); /* Check the two results for correctness. */ printf("Comparing CPU and GPU results. \n"); status = compare_results (sorted_array_reference, sorted_array_d, NUM_ELEMENTS); if (status == 1) printf ("Test passed. \n"); else printf ("Test failed. \n"); exit(0); } /* Write the GPU implementation of counting sort. */ void compute_on_device (int *input_array, int *sorted_array, int num_elements, int range) { } /* Check if the array is sorted. */ int check_if_sorted (int *array, int num_elements) { int status = 1; for (int i = 1; i < num_elements; i++){ if (array[i -1] > array[i]){ status = 0; break; } } return status; } /* Check if the arrays elements are identical. */ int compare_results (int *array_1, int *array_2, int num_elements) { int status = 1; for (int i = 0; i < num_elements; i++){ if (array_1[i] != array_2[i]){ status = 0; break; } } return status; } /* Returns a random integer between the specified min and max values. */ int get_random_integer (int min, int max) { return (int) floor ((double)(min + (max - min + 1) * ((float) rand () / ((float) RAND_MAX + 1.0)))); } /* Helper function to print the given array. */ void print_array (int *this_array, int num_elements) { printf ("Array: "); for (int i = 0; i < num_elements; i++) printf ("%d ", this_array[i]); printf ("\n"); } /* Helper function to return the min and max values in the given array. */ void print_min_and_max_in_array (int *this_array, int num_elements) { int i; int current_min = INT_MAX; for (i = 0; i < num_elements; i++) if (this_array[i] < current_min) current_min = this_array[i]; int current_max = INT_MIN; for (i = 0; i < num_elements; i++) if (this_array[i] > current_max) current_max = this_array[i]; printf ("Minimum value in the array: %d. \n", current_min); printf ("Maximum value in the array: %d. \n", current_max); }
counting_sort/counting_sort.h
#ifndef _COUNTING_SORT_H_ #define _COUNTING_SORT_H_ #define RANGE 255 // The input array will have integer elements ranging from 0 to RANGE #define NUM_ELEMENTS 100000000 // Number of elements in the input array // #define DEBUG_FLAG #endif
counting_sort/counting_sort_gold.cpp
#include <stdlib.h> #include <stdio.h> #include <string.h> #include "counting_sort.h" extern "C" int counting_sort_gold (int *, int *, int, int); void print_histogram (int *, int, int); /* The reference implementation of counting sort. */ int counting_sort_gold (int *input_array, int *sorted_array, int num_elements, int range) { /* Step 1: Compute the histogram. */ int i; int num_bins = range + 1; int *bin = (int *)malloc (num_bins * sizeof(int)); // Generate a bin for each element within the range if (bin == NULL){ printf("Cannot malloc memory for the histogram. \n"); return 0; } memset(bin, 0, num_bins); for (i = 0; i < num_elements; i++) bin[input_array[i]]++; #ifdef DEBUG_FLAG print_histogram (bin, num_bins, num_elements); #endif /* Step 2: Calculate the starting indices in the output array for storing the sorted elements. * Use an inclusive scan of the bin elements. */ for (i = 1; i < num_bins; i++) bin[i] = bin[i - 1] + bin[i]; #ifdef DEBUG_FLAG print_histogram (bin, num_bins, num_elements); #endif /* Step 3: Generate the sorted array. */ int j; int start_idx = 0; for (i = 0; i < num_bins; i++){ for (j = start_idx; j < bin[i]; j++){ sorted_array[j] = i; } start_idx = bin[i]; } return 1; } /* Helper function to print the contents of the histogram. */ void print_histogram (int *bin, int num_bins, int num_elements) { int num_histogram_entries = 0; int i; for (i = 0; i < num_bins; i++){ printf ("Bin %d: %d \n", i, bin[i]); num_histogram_entries += bin[i]; } printf ("Number of elements in the input array: %d \n", num_elements); printf ("Number of histogram elements: %d \n", num_histogram_entries); }
counting_sort/counting_sort_kernel.cu
/* Write the GPU code to perform the step(s) involved in counting sort. */ __global__ void counting_sort_kernel () { }
counting_sort/Makefile
# A simple CUDA makefile # # Author: Naga Kandasamy # Date: 9/16/2015 # # CUDA depends on two things: # 1) The CUDA nvcc compiler, which needs to be on your path, # or called directly, which we do here # 2) The CUDA shared library being available at runtime, # which we make available by setting the LD_LIBRARY_PATH # variable for the duration of the makefile. # # Note that you can set your PATH and LD_LIBRARY_PATH variables as part of your # .bash_profile so that you can compile and run without using this makefile. NVCCFLAGS := -O3 -gencode arch=compute_60,code=sm_60 NVCC := /usr/local/cuda/bin/nvcc LD_LIBRARY_PATH := /usr/local/cuda/lib64 all: counting_sort counting_sort: counting_sort.cu counting_sort_gold.cpp $(NVCC) -o counting_sort counting_sort.cu counting_sort_gold.cpp $(NVCCFLAGS) clean: rm counting_sort