Hello. I've started using VTune and read about cache coherent data. Not I'm trying to uderstand VTune how it works by making some simple programs that will violate the rules of cache coherent data.
For example:
int _tmain(int argc, wchar_t* argv[])
{
int numbers = 500000;
struct object {
float matrix[16];
float matrix2[16];
float matrix3[16];
float matrix4[16];
float matrix5[16];
float matrix6[16];
float matrix7[16];
float matrix8[16];
float position[3];
float uv[2];
};
object *vars = new object[numbers];
object *vars2 = new object[numbers];
object *vars3 = new object[numbers];
object *vars4 = new object[numbers];
object *vars5 = new object[numbers];
float c;
for (int i = 0; i < numbers; i++) {
switch (rand() % 10) {
case(0):
c = vars[rand() % numbers].position[rand() % 15];
c = vars2[rand() % numbers].position[rand() % 15];
c = vars3[rand() % numbers].position[rand() % 15];
c = vars4[rand() % numbers].position[rand() % 15];
c = vars5[rand() % numbers].position[rand() % 15];
break;
case(1):
c = vars[rand() % numbers].matrix[rand() % 15];
c = vars2[rand() % numbers].matrix[rand() % 15];
c = vars3[rand() % numbers].matrix[rand() % 15];
c = vars4[rand() % numbers].matrix[rand() % 15];
c = vars5[rand() % numbers].matrix[rand() % 15];
break;
case(2) :
c = vars[rand() % numbers].matrix2[rand() % 15];
c = vars2[rand() % numbers].matrix2[rand() % 15];
c = vars3[rand() % numbers].matrix2[rand() % 15];
c = vars4[rand() % numbers].matrix2[rand() % 15];
c = vars5[rand() % numbers].matrix2[rand() % 15];
break;
case(3) :
c = vars[rand() % numbers].matrix3[rand() % 15];
c = vars2[rand() % numbers].matrix3[rand() % 15];
c = vars3[rand() % numbers].matrix3[rand() % 15];
c = vars4[rand() % numbers].matrix3[rand() % 15];
c = vars5[rand() % numbers].matrix3[rand() % 15];
break;
case(4) :
c = vars[rand() % numbers].matrix4[rand() % 15];
c = vars2[rand() % numbers].matrix4[rand() % 15];
c = vars3[rand() % numbers].matrix4[rand() % 15];
c = vars4[rand() % numbers].matrix4[rand() % 15];
c = vars5[rand() % numbers].matrix4[rand() % 15];
break;
case(5) :
c = vars[rand() % numbers].matrix5[rand() % 15];
c = vars2[rand() % numbers].matrix5[rand() % 15];
c = vars3[rand() % numbers].matrix5[rand() % 15];
c = vars4[rand() % numbers].matrix5[rand() % 15];
c = vars5[rand() % numbers].matrix5[rand() % 15];
break;
case(6) :
c = vars[rand() % numbers].matrix6[rand() % 15];
c = vars2[rand() % numbers].matrix6[rand() % 15];
c = vars3[rand() % numbers].matrix6[rand() % 15];
c = vars4[rand() % numbers].matrix6[rand() % 15];
c = vars5[rand() % numbers].matrix6[rand() % 15];
break;
case(7) :
c = vars[rand() % numbers].matrix7[rand() % 15];
c = vars2[rand() % numbers].matrix7[rand() % 15];
c = vars3[rand() % numbers].matrix7[rand() % 15];
c = vars4[rand() % numbers].matrix7[rand() % 15];
c = vars5[rand() % numbers].matrix7[rand() % 15];
break;
case(8) :
c = vars[rand() % numbers].matrix8[rand() % 15];
c = vars2[rand() % numbers].matrix8[rand() % 15];
c = vars3[rand() % numbers].matrix8[rand() % 15];
c = vars4[rand() % numbers].matrix8[rand() % 15];
c = vars5[rand() % numbers].matrix8[rand() % 15];
break;
case(9) :
c = vars[rand() % numbers].uv[rand() % 1];
c = vars2[rand() % numbers].uv[rand() % 15];
c = vars3[rand() % numbers].uv[rand() % 15];
c = vars4[rand() % numbers].uv[rand() % 15];
c = vars5[rand() % numbers].uv[rand() % 15];
break;
}
}
system("PAUSE");
return 0;
}
}
I started an Analysis and the caches seems weird or I don't understand them.
I've posted an image where L1 L2 are 0 and L3 is 0.017 and the programs seems to use big and random enough data processing.
Where should I start looking especially if I need to optimize another more complex program ?
Thanks
[attachment=30585:vtune.jpg]