%%
clear all;
Size = [1:1:2500];
div = 10;

GFlops_cpu = zeros(length(Size),1);
GFlops_gpu = zeros(length(Size),1);
Mem_MB     = zeros(length(Size),1);

%%
ii = 0;
for N=Size
    ii = ii + 1;

    % Sizes
    A = randn(N,N,'single');
    B = randn(N,N,'single');
    RPTC = round(1E-3 * ((max(Size)-N)/div)^2 + 2);
    RPTG = round(1E-3 * ((max(Size)-N)/div)^2 + 2);
    fprintf('%4.0f / %4.0f', Size(ii), Size(end));
    
    % CPU
    Cc = A*B;
    Cc = A*B;
    ts = tic;
    for rpt=1:RPTC
        Cc = A*B;
    end
    tec = toc(ts)/RPTC;
    GFlops_cpu(ii) = (2*N^3)/(tec*1E9);
    fprintf('   |   %6.2f (%6.1f s)', GFlops_cpu(ii), tec*RPTC);
    
    % GPU
    Ag = gsingle(A);
    Bg = gsingle(B);
    Cg = Ag*Bg; geval(Cg);
    Cg = Ag*Bg; geval(Cg);
    gsync; ts = tic;
    for rpt=1:RPTG
        Cg = Ag*Bg;
        geval(Cg);
    end
    gsync; teg = toc(ts)/RPTG;
    GFlops_gpu(ii) = (2*N^3)/(teg*1E9);
    fprintf('   |   %6.2f (%6.1f s)', GFlops_gpu(ii), teg*RPTG);

    % GPU Mem
    gpu_info = gpu_entry(13);
    Mem_MB(ii) = gpu_info.gpu_free/1E6;
    clear gpu_hook;
    fprintf('   |   Mem free [MB]: %6.1f\n', Mem_MB(ii));
end



%% PLOT RESULTS AND SAVE FIGURE
% Create figure
figure(1); clf(1);
plot(Size,GFlops_cpu,'r-', Size,GFlops_gpu,'g-', 'Linewidth',1.5);
grid;
xlabel('Square Matrix Size, N\timesN   [-]');
ylabel('Performance   [GFlops]');

%  Save figures
print( gcf, '-djpeg99', '-r100', 'gflops_v1_01.jpg' );
print( gcf, '-depsc2', '-r2400', 'gflops_v1_01.eps' );
save('gflops_cpu_01','GFlops_cpu');
save('gflops_gpu_01','GFlops_gpu');

