/*
* matrix-transpose.cilk
*
* Copyright (c) 2008 Cilk Arts, Inc. All rights reserved.
*
* A comparison of dense matrix multiplication
algorithms with and without
* using Cilk parallelization.
*/
#include
<cilk.h>
#include
"../include/cilk_time.h"
#include
<iostream>
#define
DEFAULT_MATRIX_SIZE 100
// cilk_for
granularity.
#define
CILK_FOR_GRAINSIZE 128
// Transpose a
double precsion square n x n matrix, A, in place
void
matrix_transpose (double * A, int n)
{
// #pragma cilk_grainsize allows expressions,
such as n/4
//#pragma
cilk_grainsize=CILK_FOR_GRAINSIZE
cilk_for (unsigned int i=1; i<n; ++i)
{ // This is the only Cilk++ keyword
used in this program
for (unsigned int j=0; j<i; ++j) {
// Swap A[i][j] and A[j][i]. The matrix is
treated as one-dimensional.
double temp = A[j*n+i];
A[j*n+i] = A[i*n+j];
A[i*n+j] = temp;
}
}
return;
}
int cilk_main
(int argc, char ** argv) {
// Create random input matrices. Override
the default size with argv[1]
int nn = DEFAULT_MATRIX_SIZE;
if (argc > 1) {
nn = std::atoi(argv[1]);
}
std::cout << "We'll transpose an
" << nn << " by " << nn << "
matrix." << std::endl;
double *A = (double*)calloc(nn * nn,
sizeof(double));
if (NULL == A) {
std::cout << "Fatal Error.
Cannot allocate matrix A." << std::endl;
return 1;
}
// Populate A pseudo-randomly - The seed is
not changed, so tests are repeatable.
// Seed the random number generator. Remove
this for repeatable results.
srand(cilk_get_time());
for (int i = 0; i < nn; ++i)
for (int k = 0; k < nn; ++k)
A[i*nn+k] = (double)((rand() %
10000) - 5000) / 1000.0;
// Retain the original for validation.
double *A0 = (double*)calloc(nn * nn,
sizeof(double));
if (NULL == A0) {
std::cout << "Fatal Error.
Cannot allocate a copy of matrix A." << std::endl;
return 1;
}
memcpy (A0, A, nn*nn*sizeof(double));
// Transpose A
long start = cilk_get_time();
matrix_transpose (A, nn);
long end = cilk_get_time();
long par_time = (end - start);
std::cout << " First Transpose Took " << par_time/1000
<< "." << par_time%1000 << " seconds."
<< std::endl;
// Validate by tranposing again and
comparing to the original
start = cilk_get_time();
matrix_transpose (A, nn);
end = cilk_get_time();
par_time = (end - start);
std::cout << " Second Transpose
Took " << par_time/1000 << "." <<
par_time%1000 << " seconds." << std::endl;
for (int i = 0; i < nn; ++i) {
for (int k = 0; k < nn; ++k) {
if (A[i*nn+k] != A0[i*nn+k]) {
std::cout <<
"Transpose failed at row " << i << ", column "
<< k <<
". Values are "
<< A[i*nn+k] << " and " << A0[i*nn+k] <<
std::endl;
return 2;
}
}
}
std::cout << "Transpose was
performed correctly." << std::endl;
free(A);
free(A0);
return 0;
}
#if
!defined(WIN32)
// Boiler plate
code required for GCC.
struct args_s {
int argc;
char **argv;
};
int cilk_main0(void *args_)
{
args_s *args = static_cast<args_s
*>(args_);
return cilk_main(args->argc,
args->argv);
}
int main(int
argc, char *argv[])
{
args_s args;
args.argc = argc;
args.argv = argv;
cilk::context *ctx =
cilk::create_context();
int retval = ctx->run(cilk_main0, (void
*)&args);
cilk::destroy_context(ctx);
return retval;
}
#endif