Not able to get scaled performance on increasing number of threads
Dmitry Olshansky
dmitry.olsh at gmail.com
Fri Feb 1 11:33:04 PST 2013
01-Feb-2013 20:08, Sparsh Mittal пишет:
> Here is the code:
>
Mine reiteration on it, with a bit of help from std.parallelism.
std.parallelism uses thread pool thus it's somewhat faster then creating
threads anew.
Still it's instantaneous for me in a range of 30-40ms even with grid
size of 1024 and 5M of iterations.
Have you enabled all of the optimizations? Correct switches are:
dmd -inline -O -release optimize_me.d
or
rdmd -inline -O -release optimize_me.d
to run after compile
import std.stdio;
import std.parallelism;
import std.datetime;
import std.conv;
immutable int gridSize = 1024;
immutable int MAXSTEPS = 5000_000; /* Maximum number of iterations */
immutable double TOL_VAL =0.00001; /* Numerical Tolerance */
immutable double omega = 0.376;
immutable double one_minus_omega = 1.0 - 0.376;
immutable int numberOfThreads = 2;
double MAX_FUNC(double a, double b)
{
return a> b? a: b;
}
double ABS_VAL(double a)
{
return a> 0? a: -a;
}
shared double[gridSize+2][gridSize+2] gridInfo;
shared double maxError = 0.0;
void main(string args[])
{
for(int i=0; i<gridSize+2; i++)
{
for(int j=0; j<gridSize+2; j++)
{
if(i==0)
gridInfo[i][j] = 1.0;
else
gridInfo[i][j] = 0.0;
}
}
bool shouldCheck = false;
bool isConverged = false;
for(int iter = 1; iter <= MAXSTEPS; iter++)
{
shouldCheck = false;
if(iter % 400 ==0)
{
shouldCheck = true;
maxError = 0.0;
}
alias MyTask = typeof(task!(SolverSlave)(0, 0, false));
//This is Phase 1
{
MyTask[numberOfThreads] tasks;
foreach(cc; 0..numberOfThreads)
{
tasks[cc] = task!(SolverSlave)(cc, 0, shouldCheck);
taskPool.put(tasks[cc]);
}
foreach(cc; 0..numberOfThreads)
tasks[cc].yieldForce();
}
//This is Phase 2
{
MyTask[numberOfThreads] tasks;
foreach(cc; 0..numberOfThreads)
{
tasks[cc] = task!(SolverSlave)(cc, 1, shouldCheck);
taskPool.put(tasks[cc]);
}
foreach(cc; 0..numberOfThreads)
tasks[cc].yieldForce();
}
if( maxError < TOL_VAL)
{
isConverged = true;
break;
}
}
/*if(isConverged)
writeln("It converged");
else
writeln("It did not converge");*/
}
void SolverSlave(int myNumber, int remainder, bool shouldCheckHere)
{
double sum =0;
//Divide task among threads
int iStart = ((myNumber*gridSize)/numberOfThreads) + 1;
int iEnd = (((myNumber+1)*gridSize)/numberOfThreads) ;
for(int i=iStart; i<= iEnd; i++)
{
for(int j=1; j< gridSize+1; j++)
{
if( ((i+j)%2 ==remainder)) //Phase 1 or 2
{
sum = ( gridInfo[i ][j+1] + gridInfo[i+1][j ] +
gridInfo[i-1][j ] + gridInfo[i ][j-1] )*0.25;
//Should not check everytime to reduce synchronization overhead
if(shouldCheckHere)
{
maxError = MAX_FUNC(ABS_VAL(omega *(sum-gridInfo[i][j])),
maxError);
}
gridInfo[i][j] = one_minus_omega*gridInfo[i][j] + omega*sum;
}
}
}
}
--
Dmitry Olshansky
More information about the Digitalmars-d-learn
mailing list