Select Git revision
sort_struct.cpp
Felix Weiglhofer authored
sort_struct.cpp 2.58 KiB
// Include auto-generated header, where the kernel is declared.
#include "SortKernel.h"
#include "KeyValuePair.h"
// Include host functions to control the GPU.
#include <xpu/host.h>
// STL includes
#include <iostream>
#include <random>
#include <vector>
int main() {
// Number of elements to sort.
constexpr size_t NumElemsPerBlock = 1000;
constexpr size_t NumBlocks = 200;
constexpr size_t NumElems = NumBlocks * NumElemsPerBlock;
// Initialize the xpu runtime and select cpu backend.
xpu::initialize();
// Host buffer.
std::vector<KeyValuePair> itemsH;
// Fill host buffer with random numbers.
std::mt19937 gen{42};
std::uniform_real_distribution<float> dist{0, 1000000};
for (size_t i = 0; i < NumElems; i++) {
itemsH.push_back(KeyValuePair{dist(gen), dist(gen)});
}
// Allocate memory on gpu for sorting.
// Due to the merge step, sorting is not guaranteed to happen in-place.
// Thus three buffers are required:
// - inputD: contains the input data.
// - bufD: additional buffer that is used by the sorting algorithm,
// must have the same size as inputD.
// - outD: buffer that contains a single pointer which points to the sorted data.
// (This will be either inputD or bufD.)
KeyValuePair *inputD = xpu::device_malloc<KeyValuePair>(NumElems);
KeyValuePair *bufD = xpu::device_malloc<KeyValuePair>(NumElems);
KeyValuePair **outD = xpu::device_malloc<KeyValuePair *>(1);
// Copy data from host to GPU.
xpu::copy(inputD, itemsH.data(), NumElems);
// Run kernel that performs the sorting.
xpu::run_kernel<GpuSort>(xpu::grid::n_blocks(NumBlocks), inputD, bufD, outD, NumElems);
// Get the buffer that contains the sorted data.
KeyValuePair *outH = nullptr;
xpu::copy(&outH, outD, 1);
// Copy sorted data back to host.
xpu::copy(itemsH.data(), outH, NumElems);
// my_memcpy(itemsH.data(), outH, NumElems);
// Check if data is sorted.
bool ok = true;
for (size_t block = 0; block < NumBlocks; block++) {
size_t offset = block * NumElemsPerBlock;
for (size_t i = 1; i < NumElemsPerBlock; i++) {
auto faa = (itemsH[offset+i-1].key <= itemsH[offset+i].key);
ok &= faa;
}
}
if (ok) { std::cout << "Data is sorted!" << std::endl;
} else {
std::cout << "Error: Data is not sorted!" << std::endl;
}
std::cout << "Cleaning up." << std::endl;
// Cleanup: Free data allocated on GPU.
xpu::free(inputD);
xpu::free(bufD);
xpu::free(outD);
return 0;
}