i wrote minimal code-sample in c++, rendering 10000 colored quads on screen. using "instancing" , updating model-matrix each quad each frame. data of 6 vertices stored in indivdual vbo und reused time. projection-matrix (orthographic) injected once @ program-start via uniform. model-matrix calculated on cpu library glm. measured rendering-time , got average fps of 52. think less, cannot find mistake/bottleneck in little sample program.
after analysis seems, 3 calculations done glm slow. doing wrong here? example, if remove rotating-calculation, fps-boost of 10 fps! maybe can me find out, can better here , how can optimize sample. important me, each quad individual configurable during runtime, decided use instancing. moving matrix-calculations gpu seems option, confused, why cpu has problems calculating 10000 model-matrices! ok, cpu bad (athlon 2 core-duo m300, gpu ati mobility radeon 4100), should task in no measurable time, or?
here minimal, working, compilable example (if u have glfw , glm). maybe have time , can me out here :)
#define glew_static #define glm_force_inline #define glm_force_sse2 #include "glew.h" #include "glfw3.h" #include "glm.hpp" #include "glm/gtc/matrix_transform.hpp" #include <conio.h> #include <cstdlib> #include <iostream> #include <ctime> gluint buildshader() { std::string strvscode = "#version 330 core\n" "in vec3 vertexposition;\n" "in mat4 modelmatrix;\n" "uniform mat4 projectionmatrix;\n" "out vec4 m_color;\n" "void main() {\n" " vec4 vecvertex = vec4(vertexposition, 1);\n" " gl_position = projectionmatrix * modelmatrix * vecvertex;\n" " m_color = gl_position;\n" "}\n"; std::string strfscode = "#version 330 core\n" "out vec4 frag_colour;\n" "in vec4 m_color;\n" "void main() {\n" " frag_colour = vec4(m_color.x, m_color.y, m_color.z, 0.5f);\n" "}\n"; gluint gluivertexshaderid = glcreateshader(gl_vertex_shader); char const * vertexsourcepointer = strvscode.c_str(); glshadersource(gluivertexshaderid, 1, &vertexsourcepointer, null); glcompileshader(gluivertexshaderid); gluint gluifragmentshaderid = glcreateshader(gl_fragment_shader); char const * fragmentsourcepointer = strfscode.c_str(); glshadersource(gluifragmentshaderid, 1, &fragmentsourcepointer, null); glcompileshader(gluifragmentshaderid); gluint gluiprogramid = glcreateprogram(); glattachshader(gluiprogramid, gluivertexshaderid); glattachshader(gluiprogramid, gluifragmentshaderid); gllinkprogram(gluiprogramid); gldeleteshader(gluivertexshaderid); gldeleteshader(gluifragmentshaderid); return gluiprogramid; } struct sprite { glm::vec3 position, dimension; float speed, rotation, rx, ry; }; struct vertex { float x, y, z; vertex(){}; vertex(float x, float y, float z) : x(x), y(y), z(z) {} }; int main(int arc, char **argv) { // glfw init int displayreswith = 1366; //modify here int displayresheight = 768; //modify here glfwinit(); glfwwindowhint(glfw_context_version_major, 3); glfwwindowhint(glfw_context_version_minor, 3); glfwwindowhint(glfw_opengl_forward_compat, 1); glfwwindowhint(glfw_opengl_profile, glfw_opengl_core_profile); glfwwindowhint(glfw_red_bits, 8); glfwwindowhint(glfw_green_bits, 8); glfwwindowhint(glfw_blue_bits, 8); glfwwindowhint(glfw_alpha_bits, 8); glfwwindowhint(glfw_depth_bits, 32); glfwwindowhint(glfw_stencil_bits, 32); glfwwindow* window = glfwcreatewindow(displayreswith, displayresheight,"instancing", glfwgetprimarymonitor(),null); int width, height; glfwmakecontextcurrent(window); glfwswapinterval(0); glfwgetframebuffersize(window, &width, &height); //glew init glewexperimental = gl_true; glewinit(); const glubyte* renderer = glgetstring(gl_renderer); const glubyte* version = glgetstring(gl_version); std::cout << "renderer: " << renderer << std::endl; std::cout << "opengl supported version: " << version << std::endl; //opengl init glenable(gl_cull_face); glcullface(gl_back); glenable(gl_depth_test); gldepthfunc(gl_less); glenable(gl_blend); glblendfunc(gl_src_alpha, gl_one_minus_src_alpha); glclearcolor(255.0f, 255.0f, 255.0f, 255.0f); //shader gluint programid = buildshader(); //vbo vertexbuffer gluint vertexbuffer; glgenbuffers(1, &vertexbuffer); glbindbuffer(gl_array_buffer, vertexbuffer); vertex vertexbufferdata[6]; vertexbufferdata[0] = vertex(-0.5f, 0.5f, 0.0f); //links oben vertexbufferdata[1] = vertex(-0.5f, -0.5f, 0.0f); //links unten vertexbufferdata[2] = vertex(0.5f, -0.5f, 0.0f); //rechts unten vertexbufferdata[3] = vertexbufferdata[2]; //rechts unten vertexbufferdata[4] = vertex(0.5f, 0.5f, 0.0f); //rechts oben vertexbufferdata[5] = vertexbufferdata[0]; //links oben glbufferdata(gl_array_buffer, sizeof(vertex)*6, vertexbufferdata, gl_static_draw); //vbo instancebuffer gluint instancebuffer; glgenbuffers(1, &instancebuffer); glbindbuffer(gl_array_buffer, instancebuffer); int imaxinstancecount = 30000; glm::mat4 *ptrinstancebufferdata = new glm::mat4[imaxinstancecount]; glbufferdata(gl_array_buffer, imaxinstancecount * sizeof(glm::mat4), null, gl_stream_draw); //vao - start gluint vertexarrayobject; glgenvertexarrays(1, &vertexarrayobject); glbindvertexarray(vertexarrayobject); //for vbo vertexbuffer glenablevertexattribarray(glgetattriblocation(programid, "vertexposition")); glbindbuffer(gl_array_buffer, vertexbuffer); glvertexattribpointer( glgetattriblocation(programid, "vertexposition"), 3, gl_float, gl_false, sizeof(vertex), (void*)0 ); glvertexattribdivisor(0, 0); //for vbo instancebuffer int pos = glgetattriblocation(programid, "modelmatrix"); int pos1 = pos + 0; int pos2 = pos + 1; int pos3 = pos + 2; int pos4 = pos + 3; glenablevertexattribarray(pos1); glenablevertexattribarray(pos2); glenablevertexattribarray(pos3); glenablevertexattribarray(pos4); glbindbuffer(gl_array_buffer, instancebuffer); glvertexattribpointer(pos1, 4, gl_float, gl_false, sizeof(glfloat) * 4 * 4, (void*)(0)); glvertexattribpointer(pos2, 4, gl_float, gl_false, sizeof(glfloat) * 4 * 4, (void*)(sizeof(float) * 4)); glvertexattribpointer(pos3, 4, gl_float, gl_false, sizeof(glfloat) * 4 * 4, (void*)(sizeof(float) * 8)); glvertexattribpointer(pos4, 4, gl_float, gl_false, sizeof(glfloat) * 4 * 4, (void*)(sizeof(float) * 12)); glvertexattribdivisor(pos1, 1); glvertexattribdivisor(pos2, 1); glvertexattribdivisor(pos3, 1); glvertexattribdivisor(pos4, 1); glbindvertexarray(0); //vao - end //matrix vars glm::mat4 projection, rotating, scaling, translation, identity; glm::vec3 zrotatevec(0.0f, 0.0f, 1.0f); //calc projection-matrix , put shader (uniform) projection = glm::ortho(0.0f, (float)width, 0.0f, (float)height, 0.0f, 1.0f); gluseprogram(programid); gluniformmatrix4fv(glgetuniformlocation(programid, "projectionmatrix"), 1, gl_false, &projection[0][0]); //creating sprites std::srand(static_cast<unsigned int>(std::time(0))); int iactinstancecount = 10000; sprite *ptrsprites = new sprite[iactinstancecount]; (int = 0; < iactinstancecount; ++i) { ptrsprites[i].dimension = glm::vec3(16, 16, 1.0f); ptrsprites[i].position = glm::vec3(std::rand()%(width-32),std::rand()%(height-32),-1.0f *((std::rand()%256)/256.0f)); ptrsprites[i].rotation = rand() % 360 + 0.0f; ptrsprites[i].rx = static_cast<float>(std::rand() % 2); ptrsprites[i].ry = static_cast<float>(std::rand() % 2); ptrsprites[i].speed = (std::rand() % 100) + 1.0f; if (ptrsprites[i].speed < 1.0f) ptrsprites[i].speed = 1.0f; } //fps init double fframesrendered = 0.0f; double fframemeasurementstart = 0.0f; double ffps = 0.0f; double fcurrenttime = 0.0f; glfwsettime(0); //main-loop (also renderloop) while (!glfwwindowshouldclose(window)) { //application-logic if (glfwgetkey(window, glfw_key_escape)== glfw_press) glfwsetwindowshouldclose(window, gl_true); const double fnewtime = glfwgettime(); double fdeltatime = fnewtime - fcurrenttime; fcurrenttime = fnewtime; (int = 0; < iactinstancecount; ++i) { float fspeed = ptrsprites[i].speed * static_cast<float>(fdeltatime); ptrsprites[i].rotation += fspeed; if (ptrsprites[i].rotation >= 360.0f) ptrsprites[i].rotation = 0.0f; if (ptrsprites[i].rx == 1) ptrsprites[i].position.x = ptrsprites[i].position.x + fspeed; if (ptrsprites[i].rx == 0) ptrsprites[i].position.x = ptrsprites[i].position.x - fspeed; if (ptrsprites[i].ry == 1) ptrsprites[i].position.y = ptrsprites[i].position.y + fspeed; if (ptrsprites[i].ry == 0) ptrsprites[i].position.y = ptrsprites[i].position.y - fspeed; if (ptrsprites[i].position.x <= 0) ptrsprites[i].rx = 1; if (ptrsprites[i].position.x + ptrsprites[i].dimension.x >= width) ptrsprites[i].rx = 0; if (ptrsprites[i].position.y <= 0) ptrsprites[i].ry = 1; if (ptrsprites[i].position.y + ptrsprites[i].dimension.y >= height) ptrsprites[i].ry = 0; //matrix-calculations (saved in local buffer) translation = glm::translate(identity, ptrsprites[i].position + glm::vec3(ptrsprites[i].dimension.x / 2.0f, ptrsprites[i].dimension.y / 2.0f, 0.0f)); scaling = glm::scale(translation, ptrsprites[i].dimension); ptrinstancebufferdata[i] = glm::rotate(scaling, ptrsprites[i].rotation, zrotatevec); } //render-call glclear(gl_color_buffer_bit | gl_depth_buffer_bit); gluseprogram(programid); glbindvertexarray(vertexarrayobject); glbindbuffer(gl_array_buffer, instancebuffer); glbufferdata(gl_array_buffer, imaxinstancecount * sizeof(glm::mat4), null, gl_stream_draw); // buffer orphaning glbuffersubdata(gl_array_buffer, 0, iactinstancecount * sizeof(glm::mat4), ptrinstancebufferdata); gldrawarraysinstanced(gl_triangles, 0, 6, iactinstancecount); glbindvertexarray(0); glfwswapbuffers(window); glfwpollevents(); //fps-stuff ++fframesrendered; if ((fcurrenttime*1000.0f) >= (fframemeasurementstart*1000.0f) + 1000.0f) { ffps = ((fcurrenttime*1000.0f) - (fframemeasurementstart*1000.0f)) / 1000.0f * fframesrendered; fframemeasurementstart = fcurrenttime; fframesrendered = 0; std::cout << "fps: " << ffps << std::endl; } } //termination , cleanup gldeletebuffers(1, &vertexbuffer); gldeletebuffers(1, &instancebuffer); gldeletevertexarrays(1, &vertexarrayobject); gldeleteprogram(programid); glfwdestroywindow(window); glfwterminate(); return _getch(); }
well, after testing on machine, cpu limited, nothing ogl going make difference. ~300fps gcc on @ least -o1, ~80 -o0. cpu fast (i7 2600k, 4.7ghz), gpu rather slow (gt 520). i'm on ubuntu.
some quick ideas things might speed little:
- put vertex positions in array in vertex shader , use gl_vertexid access them
- use gl_triangle_strip instead of gl_triangles
- use radians angles, otherwise glm has convert them
none of these make of impact, really. make sure compiler set right, , there isn't more do.
Comments
Post a Comment