C++amp矩阵分块

参考自:https://msdn.microsoft.com/en-us/library/hh873135.aspx

#include <iostream>
#include <iomanip>
#include <amp.h>
using namespace concurrency;

const int ROWS = 8;
const int COLS = 9;

// tileRow and tileColumn specify the tile that each thread is in.
// globalRow and globalColum specify the location of the thread in the array_view.
// localRow and localColumn specify the location of the thread relativie to  the tile.
struct Description
{
	int value;
	int tileRow;
	int tileColumn;
	int globalRow;
	int globalColumn;
	int localRow;
	int localColumn;
};

// A helper function for formatting the output.
void SetConsoleColor(int color)
{
	int colorValue = (color == 4) ? 4 : 2;
	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colorValue);
}

// A helper function for farmatting the output.
void SetConsoleSize(int height, int width)
{
	COORD coord; coord.X = width; coord.Y = height;
	SetConsoleScreenBufferSize(GetStdHandle(STD_OUTPUT_HANDLE), coord);
	SMALL_RECT *rect = new SMALL_RECT();
	rect->Left = 0;
	rect->Top = 0;
	rect->Right = width;
	rect->Bottom = height;
	SetConsoleWindowInfo(GetStdHandle(STD_OUTPUT_HANDLE), true, rect);
}

// This method creates an 8 x 9 matrix of Description structures. In the
// call to parallel_for_each, the structure is updated with tile,global, and local indices.
void TilingDescription()
{
	// Create 72(8x9) Description structures.
	std::vector<Description> descs;
	for (int i = 0; i < ROWS * COLS; i++)
	{
		Description d = { i, 0, 0, 0, 0, 0, 0 };
		descs.push_back(d);
	}

	// Create an array_view from the Description structures.
	extent<2> matrix(ROWS, COLS);
	array_view<Description, 2> descriptions(matrix, descs);

	// Update each Description with the tile, global. and local indices.
	parallel_for_each(descriptions.extent.tile<2, 3>(),
		[=](tiled_index<2, 3> t_idx) restrict(amp)
	{	
		descriptions[t_idx].globalRow = t_idx.global[0];
		descriptions[t_idx].globalColumn = t_idx.global[1];
		descriptions[t_idx].tileRow = t_idx.tile[0];
		descriptions[t_idx].tileColumn = t_idx.tile[1];
		descriptions[t_idx].localRow = t_idx.local[0];
		descriptions[t_idx].localColumn = t_idx.local[1];
	});

	// Print out the Description structure for each element in the matrix.
	// Tiles are displayed in red and green to distinguish them from each other.
	SetConsoleSize(100, 150);
	for (int row = 0; row < ROWS; row++)
	{
		for (int column = 0; column < COLS; column++)
		{
			SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
			std::cout << "Value: " << std::setw(2) << descriptions(row, column).value << "    ";
		}
		std::cout << "\n";

		for (int column = 0; column < COLS; column++)
		{
			SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
			std::cout << "Tile: " << "(" << descriptions(row, column).tileRow << "," << descriptions(row, column).tileColumn << ") ";
		}
		std::cout << "\n";

		for (int column = 0; column < COLS; column++)
		{
			SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
			std::cout << "Global: " << "(" << descriptions(row, column).globalRow << "," << descriptions(row, column).globalColumn << ")";
		}
		std::cout << "\n";

		for (int column = 0; column < COLS; column++)
		{
			SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
			std::cout << "Local: " << "(" << descriptions(row, column).localRow << "," << descriptions(row, column).localColumn << ")";
		}
		std::cout << "\n";
		std::cout << "\n";
	}
}

#define SAMPLESIZE 2
#define MATRIXSIZE 8
void SamplingExample()
{
	// Create data and array_view for the matrix.
	std::vector<float> rawData;
	for (int i = 0; i < MATRIXSIZE * MATRIXSIZE; i++)
	{
		rawData.push_back((float)i);
	}
	extent<2> dataExtent(MATRIXSIZE, MATRIXSIZE);
	array_view<float, 2> matrix(dataExtent, rawData);

	// Create the array for  the averages.
	// There is one element in the output for each tile in the data.
	std::vector<float> outputData;
	int outputSize = MATRIXSIZE / SAMPLESIZE;
	for (int j = 0; j < outputSize * outputSize; j++)
	{
		outputData.push_back((float)0);
	}

	extent<2> outputExtent(MATRIXSIZE / SAMPLESIZE, MATRIXSIZE / SAMPLESIZE);
	array<float, 2> averages(outputExtent, outputData.begin(), outputData.end());

	// Use tiles that are SAMPLESIZE x SAMPLESIZE
	// Find the average of the values in each tile.
	// The only reference-type variable you can pass into the parallel_fo_each_call
	// is a concurrency::array.
	parallel_for_each(matrix.extent.tile<SAMPLESIZE, SAMPLESIZE>(),
		[=, &averages](tiled_index<SAMPLESIZE, SAMPLESIZE> t_idx) restrict(amp)
		{
			// Copy the values of the tile into a tile-sized array.
			tile_static float tileValues[SAMPLESIZE][SAMPLESIZE];
			tileValues[t_idx.local[0]][t_idx.local[1]] = matrix[t_idx];

			// Wait for the tile-sized array to load before you calculate the average.
			t_idx.barrier.wait();

			// IF you remove the if statement, then the  calculation executes for every
			// thread in the tile, and makes the same assignemnt to averages each time.
			if (t_idx.local[0] == 0 && t_idx.local[1] == 0)
			{
				for (int trow = 0; trow < SAMPLESIZE; trow++)
				{
					for (int tcol = 0; tcol < SAMPLESIZE; tcol++)
					{
						averages(t_idx.tile[0], t_idx.tile[1]) += tileValues[trow][tcol];
					}
				}
				averages(t_idx.tile[0], t_idx.tile[1]) /= (float)(SAMPLESIZE * SAMPLESIZE);
			}
		}
	);

	// Print out the results.
	// You cannot access the values in aveages directly. You must copy them
	// vack to a CPU variable.
	outputData = averages;
	for (int row = 0; row < outputSize; row++)
	{
		for (int col = 0; col < outputSize; col++)
		{
			std::cout << outputData[row * outputSize + col] << " ";
		}
		std::cout << "\n";
	}

	// Output for SAMPLESSIZE = 2 is:
	// 4.5  6.5  8.5  10.5
	// 20.5 22.5 24.5 26.5
	// 36.5 38.5 40.5 42.5
	// 52.5 54.5 56.5 58.5

	// Output for SAMPLESIZE = 4 is:
	// 13.5  17.5
	// 45.5  49.5
}

void main()
{
	//TilingDescription();
	SamplingExample();
	
	char wait;
	std::cin >> wait;
}

  

原文链接: https://www.cnblogs.com/WuhanLiukai/p/4545757.html

欢迎关注

微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍

    C++amp矩阵分块

原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/217076

非原创文章文中已经注明原地址,如有侵权,联系删除

关注公众号【高性能架构探索】,第一时间获取最新文章

转载文章受原作者版权保护。转载请注明原作者出处!

(0)
上一篇 2023年2月13日 上午9:37
下一篇 2023年2月13日 上午9:37

相关推荐