initial commit
This commit is contained in:
commit
0bee7c5ead
7 changed files with 859 additions and 0 deletions
136
ex0.1/spectral-norm/spectral-norm.cc
Normal file
136
ex0.1/spectral-norm/spectral-norm.cc
Normal file
|
@ -0,0 +1,136 @@
|
|||
// The Computer Language Benchmarks Game
|
||||
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
|
||||
//
|
||||
// Original C contributed by Sebastien Loisel
|
||||
// Conversion to C++ by Jon Harrop
|
||||
// OpenMP parallelize by The Anh Tran
|
||||
// Add SSE by The Anh Tran
|
||||
// Additional SSE optimization by Krzysztof Jakubowski
|
||||
|
||||
// g++ -pipe -O3 -march=native -fopenmp -mfpmath=sse -msse2 \
|
||||
// ./spec.c++ -o ./spec.run
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <sched.h>
|
||||
#include <omp.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
template <bool modei> int Index(int i, int j) {
|
||||
return (((i + j) * (i + j + 1)) >> 1) + (modei? i : j) + 1;
|
||||
}
|
||||
|
||||
template <bool modei>
|
||||
void EvalPart(double *__restrict__ src, double *__restrict__ dst,
|
||||
int begin, int end, int length) {
|
||||
int i = begin;
|
||||
|
||||
for(; i + 1 < end; i += 2) {
|
||||
__m128d sum = _mm_set_pd(
|
||||
src[0] / double(Index<modei>(i + 1, 0)),
|
||||
src[0] / double(Index<modei>(i + 0, 0)));
|
||||
|
||||
__m128d ti = modei?
|
||||
_mm_set_pd(i + 1, i + 0) :
|
||||
_mm_set_pd(i + 2, i + 1);
|
||||
__m128d last = _mm_set_pd(
|
||||
Index<modei>(i + 1, 0),
|
||||
Index<modei>(i + 0, 0));
|
||||
|
||||
for(int j = 1; j < length; j++) {
|
||||
__m128d idx = last + ti + _mm_set1_pd(j);
|
||||
last = idx;
|
||||
sum = sum + _mm_set1_pd(src[j]) / idx;
|
||||
}
|
||||
|
||||
_mm_storeu_pd(dst + i + 0, sum);
|
||||
}
|
||||
for(; i < end; i++) {
|
||||
double sum = 0;
|
||||
for (int j = 0; j < length; j++)
|
||||
sum += src[j] / double(Index<modei>(i, j));
|
||||
dst[i] = sum;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void EvalATimesU(double *src, double *dst, int begin, int end, int N) {
|
||||
EvalPart<1>(src, dst, begin, end, N);
|
||||
}
|
||||
|
||||
void EvalAtTimesU(double *src, double *dst, int begin, int end, int N) {
|
||||
EvalPart<0>(src, dst, begin, end, N);
|
||||
}
|
||||
|
||||
void EvalAtATimesU(double *src, double *dst, double *tmp,
|
||||
int begin, int end, int N) {
|
||||
EvalATimesU (src, tmp, begin, end, N);
|
||||
#pragma omp barrier
|
||||
EvalAtTimesU(tmp, dst, begin, end, N);
|
||||
#pragma omp barrier
|
||||
}
|
||||
|
||||
int GetThreadCount() {
|
||||
cpu_set_t cs;
|
||||
CPU_ZERO(&cs);
|
||||
sched_getaffinity(0, sizeof(cs), &cs);
|
||||
|
||||
int count = 0;
|
||||
for (int i = 0; i < CPU_SETSIZE; ++i)
|
||||
if (CPU_ISSET(i, &cs))
|
||||
++count;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
double spectral_game(int N) {
|
||||
__attribute__((aligned(16))) double u[N];
|
||||
__attribute__((aligned(16))) double v[N], tmp[N];
|
||||
|
||||
double vBv = 0.0;
|
||||
double vv = 0.0;
|
||||
|
||||
#pragma omp parallel default(shared) num_threads(GetThreadCount())
|
||||
{
|
||||
// this block will be executed by NUM_THREADS
|
||||
// variable declared in this block is private for each thread
|
||||
int threadid = omp_get_thread_num();
|
||||
int threadcount = omp_get_num_threads();
|
||||
int chunk = N / threadcount;
|
||||
|
||||
// calculate each thread's working range [r1 .. r2) => static schedule
|
||||
int begin = threadid * chunk;
|
||||
int end = (threadid < (threadcount -1)) ? (begin + chunk) : N;
|
||||
|
||||
for(int i = begin; i < end; i++)
|
||||
u[i] = 1.0;
|
||||
#pragma omp barrier
|
||||
|
||||
for (int ite = 0; ite < 10; ++ite) {
|
||||
EvalAtATimesU(u, v, tmp, begin, end, N);
|
||||
EvalAtATimesU(v, u, tmp, begin, end, N);
|
||||
}
|
||||
|
||||
double sumvb = 0.0, sumvv = 0.0;
|
||||
for (int i = begin; i < end; i++) {
|
||||
sumvv += v[i] * v[i];
|
||||
sumvb += u[i] * v[i];
|
||||
}
|
||||
|
||||
#pragma omp critical
|
||||
{
|
||||
vBv += sumvb;
|
||||
vv += sumvv;
|
||||
}
|
||||
}
|
||||
|
||||
return sqrt(vBv / vv);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int N = ((argc >= 2) ? atoi(argv[1]) : 2000);
|
||||
printf("%.9f\n", spectral_game(N));
|
||||
return 0;
|
||||
}
|
||||
|
64
ex0.1/spectral-norm/spectral-norm.py
Normal file
64
ex0.1/spectral-norm/spectral-norm.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
# The Computer Language Benchmarks Game
|
||||
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
|
||||
#
|
||||
# Contributed by Sebastien Loisel
|
||||
# Fixed by Isaac Gouy
|
||||
# Sped up by Josh Goldfoot
|
||||
# Dirtily sped up by Simon Descarpentries
|
||||
# Used list comprehension by Vadim Zelenin
|
||||
# 2to3
|
||||
|
||||
from math import sqrt
|
||||
from sys import argv
|
||||
|
||||
|
||||
def eval_A(i, j):
|
||||
ij = i+j
|
||||
return 1.0 / (ij * (ij + 1) / 2 + i + 1)
|
||||
|
||||
|
||||
def eval_A_times_u(u):
|
||||
local_eval_A = eval_A
|
||||
|
||||
return [ sum([ local_eval_A(i, j) * u_j
|
||||
for j, u_j in enumerate(u)
|
||||
]
|
||||
)
|
||||
for i in range(len(u))
|
||||
]
|
||||
|
||||
|
||||
def eval_At_times_u(u):
|
||||
local_eval_A = eval_A
|
||||
|
||||
return [ sum([ local_eval_A(j, i) * u_j
|
||||
for j, u_j in enumerate(u)
|
||||
]
|
||||
)
|
||||
for i in range(len(u))
|
||||
]
|
||||
|
||||
|
||||
def eval_AtA_times_u(u):
|
||||
return eval_At_times_u(eval_A_times_u(u))
|
||||
|
||||
|
||||
def main():
|
||||
n = int(argv[1])
|
||||
u = [1] * n
|
||||
local_eval_AtA_times_u = eval_AtA_times_u
|
||||
|
||||
for dummy in range(10):
|
||||
v = local_eval_AtA_times_u(u)
|
||||
u = local_eval_AtA_times_u(v)
|
||||
|
||||
vBv = vv = 0
|
||||
|
||||
for ue, ve in zip(u, v):
|
||||
vBv += ue * ve
|
||||
vv += ve * ve
|
||||
|
||||
print("%0.9f" % (sqrt(vBv/vv)))
|
||||
|
||||
main()
|
||||
|
Reference in a new issue