Commit 5253e9bd authored by Pierre Kraemer's avatar Pierre Kraemer

authorize external threads to traverse a map + thread safe SphericalHarmonics

parent d19cc06a
...@@ -34,7 +34,7 @@ namespace CGoGN ...@@ -34,7 +34,7 @@ namespace CGoGN
const unsigned int EMBNULL = 0xffffffff; const unsigned int EMBNULL = 0xffffffff;
const unsigned int MRNULL = 0xffffffff; const unsigned int MRNULL = 0xffffffff;
const unsigned int NB_THREAD = 16; const unsigned int NB_THREADS = 16;
// DO NOT MODIFY (ORBIT_IN_PARENT function in Map classes) // DO NOT MODIFY (ORBIT_IN_PARENT function in Map classes)
......
...@@ -104,24 +104,42 @@ protected: ...@@ -104,24 +104,42 @@ protected:
// protected copy constructor to prevent the copy of map // protected copy constructor to prevent the copy of map
GenericMap(const GenericMap& ) {} GenericMap(const GenericMap& ) {}
/**
* @brief m_thread_ids
* vector of known thread ids, i.e. threads for which a mark vector,
* a Dart buffer or a uint buffer will be given when asked
*/
mutable std::vector<std::thread::id> m_thread_ids;
/** /**
* * @brief m_authorizeExternalThreads
* if true, getCurrentThreadIndex will give an index to an unknown thread
*/ */
std::vector<std::thread::id> m_thread_ids; bool m_authorizeExternalThreads;
public: public:
/// compute thread index in the table of thread /// compute thread index in the table of thread
inline unsigned int getCurrentThreadIndex() const; inline unsigned int getCurrentThreadIndex() const;
/// add place for n new threads in the table of thread return index of first // /// register the given thread ID for access to resources on this map
inline unsigned int addEmptyThreadIds(unsigned int n); // inline void addThreadId(const std::thread::id id);
/// unregister the given thread to access resources on this map
inline void removeThreadId(const std::thread::id id);
/// remove the n last added threads from table /// add room for a new thread ID (will be set on thread start)
inline void popThreadIds(unsigned int nb); inline std::thread::id& addEmptyThreadId();
/// get ref to jth threadId for updating (in thread) /// get a threadId based on its index
inline std::thread::id& getThreadId(unsigned int j); inline std::thread::id getThreadId(unsigned int index) const;
/**
* @brief setExternalThreadsAuthorization
* @param b
* if true, threads that did not created the map will be able to traverse it
* if false, traversal of the map by other threads will fail
*/
inline void setExternalThreadsAuthorization(bool b);
protected: protected:
/** /**
...@@ -132,14 +150,14 @@ protected: ...@@ -132,14 +150,14 @@ protected:
static std::map<std::string, RegisteredBaseAttribute*>* m_attributes_registry_map; static std::map<std::string, RegisteredBaseAttribute*>* m_attributes_registry_map;
/// buffer for less memory allocation /// buffer for less memory allocation
static std::vector< std::vector<Dart>* >* s_vdartsBuffers; mutable std::vector< std::vector<Dart>* > s_vdartsBuffers[NB_THREADS];
static std::vector< std::vector<unsigned int>* >* s_vintsBuffers; mutable std::vector< std::vector<unsigned int>* > s_vintsBuffers[NB_THREADS];
public: public:
/// table of instancied maps for Dart/CellMarker release /// table of instancied maps for Dart/CellMarker release
static std::vector<GenericMap*>* s_instances; static std::vector<GenericMap*>* s_instances;
protected:
protected:
/** /**
* Direct access to the Dart attributes that store the orbits embeddings * Direct access to the Dart attributes that store the orbits embeddings
* (only initialized when necessary, i.e. addEmbedding function) * (only initialized when necessary, i.e. addEmbedding function)
...@@ -154,7 +172,7 @@ protected: ...@@ -154,7 +172,7 @@ protected:
AttributeMultiVector<NoTypeNameAttribute<std::vector<Dart> > >* m_quickLocalIncidentTraversal[NB_ORBITS][NB_ORBITS] ; AttributeMultiVector<NoTypeNameAttribute<std::vector<Dart> > >* m_quickLocalIncidentTraversal[NB_ORBITS][NB_ORBITS] ;
AttributeMultiVector<NoTypeNameAttribute<std::vector<Dart> > >* m_quickLocalAdjacentTraversal[NB_ORBITS][NB_ORBITS] ; AttributeMultiVector<NoTypeNameAttribute<std::vector<Dart> > >* m_quickLocalAdjacentTraversal[NB_ORBITS][NB_ORBITS] ;
std::vector< AttributeMultiVector<MarkerBool>* > m_markVectors_free[NB_ORBITS][NB_THREAD] ; std::vector< AttributeMultiVector<MarkerBool>* > m_markVectors_free[NB_ORBITS][NB_THREADS] ;
std::mutex m_MarkerStorageMutex[NB_ORBITS]; std::mutex m_MarkerStorageMutex[NB_ORBITS];
unsigned int m_nextMarkerId; unsigned int m_nextMarkerId;
......
...@@ -26,43 +26,79 @@ ...@@ -26,43 +26,79 @@
namespace CGoGN namespace CGoGN
{ {
/**************************************** /****************************************
* THREAD ID MANAGEMENT * * THREAD ID MANAGEMENT *
****************************************/ ****************************************/
inline unsigned int GenericMap::getCurrentThreadIndex() const inline unsigned int GenericMap::getCurrentThreadIndex() const
{ {
std::thread::id id = std::this_thread::get_id(); std::thread::id id = std::this_thread::get_id();
unsigned int i=0; for (unsigned int i = 0; i < m_thread_ids.size(); ++i)
while (id != m_thread_ids[i]) {
if (id == m_thread_ids[i])
return i;
}
assert(m_thread_ids.size() < NB_THREADS + 1);
if (m_authorizeExternalThreads)
{
m_thread_ids.push_back(id);
return m_thread_ids.size() - 1;
}
else
{ {
i++; return -1;
assert(i<m_thread_ids.size());
} }
return i;
} }
//inline void GenericMap::addThreadId(const std::thread::id& id) //inline void GenericMap::addThreadId(const std::thread::id id)
//{ //{
// m_thread_ids.push_back(id); // for (unsigned int i = 0; i < m_thread_ids.size(); ++i)
// {
// if (m_thread_ids[i] == id)
// return;
// }
// assert(m_thread_ids.size() < NB_THREADS + 1);
// if (m_authorizeExternalThreads)
// m_thread_ids.push_back(id);
//} //}
inline unsigned int GenericMap::addEmptyThreadIds(unsigned int n) inline void GenericMap::removeThreadId(const std::thread::id id)
{
for (unsigned int i = 0; i < m_thread_ids.size(); ++i)
{
if (m_thread_ids[i] == id)
{
m_thread_ids[i] = m_thread_ids.back();
m_thread_ids.pop_back();
break;
}
}
}
inline std::thread::id& GenericMap::addEmptyThreadId()
{ {
unsigned int nb = uint32(m_thread_ids.size()); assert(m_thread_ids.size() < NB_THREADS + 1);
m_thread_ids.resize(nb + n); unsigned int size = uint32(m_thread_ids.size());
return nb; m_thread_ids.resize(size + 1);
return m_thread_ids.back();
} }
inline void GenericMap::popThreadIds(unsigned int nb) inline std::thread::id GenericMap::getThreadId(unsigned int index) const
{ {
assert(nb<m_thread_ids.size()); assert(index < m_thread_ids.size());
for ( unsigned int i=0; i<nb; ++i) return m_thread_ids[index];
m_thread_ids.pop_back();
} }
inline std::thread::id& GenericMap::getThreadId(unsigned int j) inline void GenericMap::setExternalThreadsAuthorization(bool b)
{ {
return m_thread_ids[j]; m_authorizeExternalThreads = b;
if (!m_authorizeExternalThreads)
{
// keep only the thread that created the map
while (m_thread_ids.size() > 1)
m_thread_ids.pop_back();
}
} }
...@@ -90,7 +126,7 @@ inline void GenericMap::releaseDartBuffer(std::vector<Dart>* vd) const ...@@ -90,7 +126,7 @@ inline void GenericMap::releaseDartBuffer(std::vector<Dart>* vd) const
{ {
unsigned int thread = getCurrentThreadIndex(); unsigned int thread = getCurrentThreadIndex();
if (vd->capacity()>1024) if (vd->capacity() > 1024)
{ {
std::vector<Dart> v; std::vector<Dart> v;
vd->swap(v); vd->swap(v);
...@@ -100,7 +136,6 @@ inline void GenericMap::releaseDartBuffer(std::vector<Dart>* vd) const ...@@ -100,7 +136,6 @@ inline void GenericMap::releaseDartBuffer(std::vector<Dart>* vd) const
s_vdartsBuffers[thread].push_back(vd); s_vdartsBuffers[thread].push_back(vd);
} }
inline std::vector<unsigned int>* GenericMap::askUIntBuffer() const inline std::vector<unsigned int>* GenericMap::askUIntBuffer() const
{ {
unsigned int thread = getCurrentThreadIndex(); unsigned int thread = getCurrentThreadIndex();
...@@ -121,7 +156,7 @@ inline void GenericMap::releaseUIntBuffer(std::vector<unsigned int>* vui) const ...@@ -121,7 +156,7 @@ inline void GenericMap::releaseUIntBuffer(std::vector<unsigned int>* vui) const
{ {
unsigned int thread = getCurrentThreadIndex(); unsigned int thread = getCurrentThreadIndex();
if (vui->capacity()>1024) if (vui->capacity() > 1024)
{ {
std::vector<unsigned int> v; std::vector<unsigned int> v;
vui->swap(v); vui->swap(v);
...@@ -132,7 +167,6 @@ inline void GenericMap::releaseUIntBuffer(std::vector<unsigned int>* vui) const ...@@ -132,7 +167,6 @@ inline void GenericMap::releaseUIntBuffer(std::vector<unsigned int>* vui) const
} }
/**************************************** /****************************************
* DARTS MANAGEMENT * * DARTS MANAGEMENT *
****************************************/ ****************************************/
...@@ -280,27 +314,25 @@ AttributeMultiVector<MarkerBool>* GenericMap::askMarkVector() ...@@ -280,27 +314,25 @@ AttributeMultiVector<MarkerBool>* GenericMap::askMarkVector()
{ {
std::lock_guard<std::mutex> lockMV(m_MarkerStorageMutex[ORBIT]); std::lock_guard<std::mutex> lockMV(m_MarkerStorageMutex[ORBIT]);
unsigned int x=m_nextMarkerId++; unsigned int x = m_nextMarkerId++;
std::string number("___"); std::string number("___");
number[2]= '0'+x%10; number[2] = '0'+x%10;
x = x/10; x = x/10;
number[1]= '0'+x%10; number[1] = '0'+x%10;
x = x/10; x = x/10;
number[0]= '0'+x%10; number[0] = '0'+x%10;
AttributeMultiVector<MarkerBool>* amv = m_attribs[ORBIT].addMarkerAttribute("marker_" + orbitName(ORBIT) + number); AttributeMultiVector<MarkerBool>* amv = m_attribs[ORBIT].addMarkerAttribute("marker_" + orbitName(ORBIT) + number);
return amv; return amv;
} }
} }
template <unsigned int ORBIT> template <unsigned int ORBIT>
inline void GenericMap::releaseMarkVector(AttributeMultiVector<MarkerBool>* amv) inline void GenericMap::releaseMarkVector(AttributeMultiVector<MarkerBool>* amv)
{ {
assert(isOrbitEmbedded<ORBIT>() || !"Invalid parameter: orbit not embedded") ; assert(isOrbitEmbedded<ORBIT>() || !"Invalid parameter: orbit not embedded") ;
unsigned int thread = getCurrentThreadIndex(); unsigned int thread = getCurrentThreadIndex();
m_markVectors_free[ORBIT][thread].push_back(amv); m_markVectors_free[ORBIT][thread].push_back(amv);
} }
......
...@@ -660,7 +660,8 @@ protected: ...@@ -660,7 +660,8 @@ protected:
bool& m_finished; bool& m_finished;
unsigned int m_id; unsigned int m_id;
FUNC m_lambda; FUNC m_lambda;
std::thread::id& m_threadId; // ref on thread::id in table of threads in genericMap for init at operator() std::thread::id& m_threadId; // ref on thread::id in table of threads in genericMap for init at operator()
public: public:
ThreadFunction(FUNC func, std::vector<CELL>& vd, Utils::Barrier& s1, Utils::Barrier& s2, bool& finished, unsigned int id, std::thread::id& threadId) : ThreadFunction(FUNC func, std::vector<CELL>& vd, Utils::Barrier& s1, Utils::Barrier& s2, bool& finished, unsigned int id, std::thread::id& threadId) :
m_cells(vd), m_sync1(s1), m_sync2(s2), m_finished(finished), m_id(id), m_lambda(func), m_threadId(threadId) m_cells(vd), m_sync1(s1), m_sync2(s2), m_finished(finished), m_id(id), m_lambda(func), m_threadId(threadId)
...@@ -670,6 +671,8 @@ public: ...@@ -670,6 +671,8 @@ public:
ThreadFunction(const ThreadFunction<ORBIT, FUNC>& tf): ThreadFunction(const ThreadFunction<ORBIT, FUNC>& tf):
m_cells(tf.m_cells), m_sync1(tf.m_sync1), m_sync2(tf.m_sync2), m_finished(tf.m_finished), m_id(tf.m_id), m_lambda(tf.m_lambda){} m_cells(tf.m_cells), m_sync1(tf.m_sync1), m_sync2(tf.m_sync2), m_finished(tf.m_finished), m_id(tf.m_id), m_lambda(tf.m_lambda){}
std::thread::id& getThreadId() { return m_threadId; }
void operator()() void operator()()
{ {
// first thing to do set the thread id in genericMap // first thing to do set the thread id in genericMap
...@@ -714,10 +717,12 @@ void foreach_cell_tmpl(MAP& map, FUNC func, unsigned int nbth) ...@@ -714,10 +717,12 @@ void foreach_cell_tmpl(MAP& map, FUNC func, unsigned int nbth)
ThreadFunction<ORBIT,FUNC>** tfs = new ThreadFunction<ORBIT,FUNC>*[nbth]; ThreadFunction<ORBIT,FUNC>** tfs = new ThreadFunction<ORBIT,FUNC>*[nbth];
// add place for nbth new threads in the table of threadId in genericmap // add place for nbth new threads in the table of threadId in genericmap
unsigned int firstThread = map.addEmptyThreadIds(nbth); // unsigned int firstThread = map.addEmptyThreadIds(nbth);
for (unsigned int i = 0; i < nbth; ++i) for (unsigned int i = 0; i < nbth; ++i)
{ {
tfs[i] = new ThreadFunction<ORBIT,FUNC>(func, vd[i],sync1,sync2, finished,1+i,map.getThreadId(firstThread+i)); std::thread::id& threadId = map.addEmptyThreadId();
tfs[i] = new ThreadFunction<ORBIT,FUNC>(func, vd[i], sync1, sync2, finished, 1+i, threadId);
threads[i] = new std::thread( std::ref( *(tfs[i]) ) ); threads[i] = new std::thread( std::ref( *(tfs[i]) ) );
} }
...@@ -744,21 +749,19 @@ void foreach_cell_tmpl(MAP& map, FUNC func, unsigned int nbth) ...@@ -744,21 +749,19 @@ void foreach_cell_tmpl(MAP& map, FUNC func, unsigned int nbth)
sync2.wait();// everybody refilled then go sync2.wait();// everybody refilled then go
} }
sync1.wait();// wait for all thread to finish its vector sync1.wait(); // wait for all thread to finish its vector
finished = true;// say finsih to everyone finished = true; // say finsih to everyone
sync2.wait(); // just wait for last barrier wait ! sync2.wait(); // just wait for last barrier wait !
// wait for all theads to be finished
//wait for all theads to be finished
for (unsigned int i = 0; i < nbth; ++i) for (unsigned int i = 0; i < nbth; ++i)
{ {
threads[i]->join(); threads[i]->join();
delete threads[i]; delete threads[i];
map.removeThreadId(tfs[i]->getThreadId());
delete tfs[i]; delete tfs[i];
} }
map.popThreadIds(nbth);
delete[] tfs; delete[] tfs;
delete[] threads; delete[] threads;
delete[] vd; delete[] vd;
......
...@@ -39,7 +39,7 @@ private: ...@@ -39,7 +39,7 @@ private:
std::vector<Dart>* m_vd ; std::vector<Dart>* m_vd ;
const GenericMap* m_map; const GenericMap* m_map;
TraversorDartsOfOrbit( const TraversorDartsOfOrbit<MAP,ORBIT>& /*tr*/){} TraversorDartsOfOrbit(const TraversorDartsOfOrbit<MAP,ORBIT>& /*tr*/){}
public: public:
TraversorDartsOfOrbit(const MAP& map, Cell<ORBIT> c) ; TraversorDartsOfOrbit(const MAP& map, Cell<ORBIT> c) ;
...@@ -66,7 +66,7 @@ private: ...@@ -66,7 +66,7 @@ private:
std::vector<Dart>* m_vd ; std::vector<Dart>* m_vd ;
const GenericMap* m_map; const GenericMap* m_map;
VTraversorDartsOfOrbit( const VTraversorDartsOfOrbit<MAP,ORBIT>& /*tr*/){} VTraversorDartsOfOrbit(const VTraversorDartsOfOrbit<MAP,ORBIT>& /*tr*/){}
public: public:
VTraversorDartsOfOrbit(const MAP& map, Cell<ORBIT> c) ; VTraversorDartsOfOrbit(const MAP& map, Cell<ORBIT> c) ;
......
...@@ -31,7 +31,10 @@ private : ...@@ -31,7 +31,10 @@ private :
static int nb_coefs ; // number of coefs = (resolution + 1) * (resolution + 1) static int nb_coefs ; // number of coefs = (resolution + 1) * (resolution + 1)
static unsigned long cpt_instances; // number of instances of the class static unsigned long cpt_instances; // number of instances of the class
static Tscalar K_tab [(max_resolution+1)*(max_resolution+1)]; // table containing constants K static Tscalar K_tab [(max_resolution+1)*(max_resolution+1)]; // table containing constants K
static Tscalar F_tab [(max_resolution+1)*(max_resolution+1)]; // table for computing the functions : P or Y or y
// static Tscalar F_tab [(max_resolution+1)*(max_resolution+1)]; // table for computing the functions : P or Y or y
static Tscalar F_tab [32][(max_resolution+1)*(max_resolution+1)];
Tcoef* coefs; // table of coefficients Tcoef* coefs; // table of coefficients
...@@ -48,12 +51,12 @@ public : ...@@ -48,12 +51,12 @@ public :
// evaluation // evaluation
static void set_eval_direction (Tscalar theta, Tscalar phi) ; // fix the direction in which the SH has to be evaluated static void set_eval_direction (Tscalar theta, Tscalar phi, unsigned int threadId = 0) ; // fix the direction in which the SH has to be evaluated
static void set_eval_direction (Tscalar x, Tscalar y, Tscalar z) ; // fix the direction in which the SH has to be evaluated static void set_eval_direction (Tscalar x, Tscalar y, Tscalar z, unsigned int threadId = 0) ; // fix the direction in which the SH has to be evaluated
Tcoef evaluate () const; // evaluates at a fixed direction Tcoef evaluate (unsigned int threadId = 0) const; // evaluates at a fixed direction
Tcoef evaluate_at (Tscalar theta, Tscalar phi) const; // eval spherical coordinates Tcoef evaluate_at (Tscalar theta, Tscalar phi, unsigned int threadId = 0) const; // eval spherical coordinates
Tcoef evaluate_at (Tscalar x, Tscalar y, Tscalar z) const; // eval cartesian coordinates Tcoef evaluate_at (Tscalar x, Tscalar y, Tscalar z, unsigned int threadId = 0) const; // eval cartesian coordinates
// I/O // I/O
const Tcoef& get_coef (int l, int m) const {assert ((l>=0 && l <=resolution) || !" maybe you forgot to call set_level()"); assert (m >= (-l) && m <= l); return get_coef(index(l,m));} const Tcoef& get_coef (int l, int m) const {assert ((l>=0 && l <=resolution) || !" maybe you forgot to call set_level()"); assert (m >= (-l) && m <= l); return get_coef(index(l,m));}
...@@ -77,17 +80,17 @@ public : ...@@ -77,17 +80,17 @@ public :
// fitting // fitting
template <typename Tdirection, typename Tchannel> template <typename Tdirection, typename Tchannel>
void fit_to_data(int n, Tdirection* t_theta, Tdirection* t_phi, Tchannel* t_R, Tchannel* t_G, Tchannel* t_B, double lambda); void fit_to_data(int n, Tdirection* t_theta, Tdirection* t_phi, Tchannel* t_R, Tchannel* t_G, Tchannel* t_B, double lambda, unsigned int threadId = 0);
template <typename Tdirection, typename Tchannel> template <typename Tdirection, typename Tchannel>
void fit_to_data(int n, Tdirection* t_x, Tdirection* t_y, Tdirection* t_z, Tchannel* t_R, Tchannel* t_G, Tchannel* t_B, double lambda); void fit_to_data(int n, Tdirection* t_x, Tdirection* t_y, Tdirection* t_z, Tchannel* t_R, Tchannel* t_G, Tchannel* t_B, double lambda, unsigned int threadId = 0);
private : private :
static int index (int l, int m) {return l*(l+1)+m;} static inline int index (int l, int m) { return l*(l+1)+m; }
// evaluation : // evaluation :
static void init_K_tab (); // compute the normalization constants K_l^m and store them into K_tab static void init_K_tab (); // compute the normalization constants K_l^m and store them into K_tab
static void compute_P_tab (Tscalar t); // Compute Legendre Polynomials at parameter t and store them in F_tab (only for m>=0) static void compute_P_tab (Tscalar t, unsigned int threadId); // Compute Legendre Polynomials at parameter t and store them in F_tab (only for m>=0)
static void compute_y_tab (Tscalar phi); // Compute the real basis functions y_l^m at (theta, phi) and store them in F_tab (compute_P_tab must have been called before) static void compute_y_tab (Tscalar phi, unsigned int threadId); // Compute the real basis functions y_l^m at (theta, phi) and store them in F_tab (compute_P_tab must have been called before)
const Tcoef& get_coef (int i) const {assert ((i>=0 && i<nb_coefs ) || !" maybe you forgot to call set_level()"); return coefs[i];} const Tcoef& get_coef (int i) const {assert ((i>=0 && i<nb_coefs ) || !" maybe you forgot to call set_level()"); return coefs[i];}
Tcoef& get_coef (int i) {assert ((i>=0 && i<nb_coefs ) || !" maybe you forgot to call set_level()"); return coefs[i];} Tcoef& get_coef (int i) {assert ((i>=0 && i<nb_coefs ) || !" maybe you forgot to call set_level()"); return coefs[i];}
......
...@@ -16,7 +16,7 @@ template <typename Tscalar,typename Tcoef> int SphericalHarmonics<Tscalar,Tcoef> ...@@ -16,7 +16,7 @@ template <typename Tscalar,typename Tcoef> int SphericalHarmonics<Tscalar,Tcoef>
template <typename Tscalar,typename Tcoef> unsigned long SphericalHarmonics<Tscalar,Tcoef>::cpt_instances = 0; template <typename Tscalar,typename Tcoef> unsigned long SphericalHarmonics<Tscalar,Tcoef>::cpt_instances = 0;
template <typename Tscalar,typename Tcoef> Tscalar SphericalHarmonics<Tscalar,Tcoef>::K_tab[(max_resolution+1)*(max_resolution+1)]; template <typename Tscalar,typename Tcoef> Tscalar SphericalHarmonics<Tscalar,Tcoef>::K_tab[(max_resolution+1)*(max_resolution+1)];
template <typename Tscalar,typename Tcoef> Tscalar SphericalHarmonics<Tscalar,Tcoef>::F_tab[(max_resolution+1)*(max_resolution+1)]; template <typename Tscalar,typename Tcoef> Tscalar SphericalHarmonics<Tscalar,Tcoef>::F_tab[32][(max_resolution+1)*(max_resolution+1)];
/************************************************************************* /*************************************************************************
...@@ -77,47 +77,52 @@ evaluation ...@@ -77,47 +77,52 @@ evaluation
**************************************************************************/ **************************************************************************/
template <typename Tscalar,typename Tcoef> template <typename Tscalar,typename Tcoef>
void SphericalHarmonics<Tscalar,Tcoef>::set_eval_direction (Tscalar theta, Tscalar phi) void SphericalHarmonics<Tscalar,Tcoef>::set_eval_direction (Tscalar theta, Tscalar phi, unsigned int threadId)
{ {
compute_P_tab(std::cos(theta)); assert(threadId < 32);
compute_y_tab(phi); compute_P_tab(std::cos(theta), threadId);
compute_y_tab(phi, threadId);
} }
template <typename Tscalar,typename Tcoef> template <typename Tscalar,typename Tcoef>
void SphericalHarmonics<Tscalar,Tcoef>::set_eval_direction (Tscalar x, Tscalar y, Tscalar z) void SphericalHarmonics<Tscalar,Tcoef>::set_eval_direction (Tscalar x, Tscalar y, Tscalar z, unsigned int threadId)
{ {
compute_P_tab(z); assert(threadId < 32);
compute_P_tab(z, threadId);
Tscalar phi (0); Tscalar phi (0);
if ((x*x + y*y) > 0.0) if ((x*x + y*y) > 0.0)
phi = atan2(y, x); phi = atan2(y, x);
compute_y_tab(phi); compute_y_tab(phi, threadId);
} }
template <typename Tscalar,typename Tcoef> template <typename Tscalar,typename Tcoef>
Tcoef SphericalHarmonics<Tscalar,Tcoef>::evaluate () const Tcoef SphericalHarmonics<Tscalar,Tcoef>::evaluate (unsigned int threadId) const
{ {
assert(threadId < 32);
Tcoef r (0); // (0.0,0.0,0.0); // TODO : use Tcoef (0) Tcoef r (0); // (0.0,0.0,0.0); // TODO : use Tcoef (0)
for (int i = 0; i < nb_coefs; i++) for (int i = 0; i < nb_coefs; i++)
{ {
r += coefs[i] * F_tab[i]; r += coefs[i] * F_tab[threadId][i];
} }
return r; return r;
} }
template <typename Tscalar,typename Tcoef> template <typename Tscalar,typename Tcoef>
Tcoef SphericalHarmonics<Tscalar,Tcoef>::evaluate_at (Tscalar theta, Tscalar phi) const Tcoef SphericalHarmonics<Tscalar,Tcoef>::evaluate_at (Tscalar theta, Tscalar phi, unsigned int threadId) const
{ {
set_eval_direction(theta, phi); assert(threadId < 32);
return evaluate(); set_eval_direction(theta, phi, threadId);
return evaluate(threadId);
} }
template <typename Tscalar,typename Tcoef> template <typename Tscalar,typename Tcoef>
Tcoef SphericalHarmonics<Tscalar,Tcoef>::evaluate_at (Tscalar x, Tscalar y, Tscalar z) const Tcoef SphericalHarmonics<Tscalar,Tcoef>::evaluate_at (Tscalar x, Tscalar y, Tscalar z, unsigned int threadId) const
{ {
set_eval_direction(x, y, z); assert(threadId < 32);
return evaluate(); set_eval_direction(x, y, z, threadId);
return evaluate(threadId);
} }
template <typename Tscalar,typename Tcoef> template <typename Tscalar,typename Tcoef>
...@@ -150,28 +155,28 @@ void SphericalHarmonics<Tscalar,Tcoef>::copy_K_tab (Tscalar tab[]) ...@@ -150,28 +155,28 @@ void SphericalHarmonics<Tscalar,Tcoef>::copy_K_tab (Tscalar tab[])
*/ */
template <typename Tscalar,typename Tcoef> template <typename Tscalar,typename Tcoef>
void SphericalHarmonics<Tscalar,Tcoef>::compute_P_tab (Tscalar t) void SphericalHarmonics<Tscalar,Tcoef>::compute_P_tab (Tscalar t, unsigned int threadId)
{ {
// if (t<0) {t=-t*t;} else {t=t*t;} // for plotting only : expand the param near equator // if (t<0) {t=-t*t;} else {t=t*t;} // for plotting only : expand the param near equator
F_tab[index(0,0)] = 1; F_tab[threadId][index(0,0)] = 1;
for (int l = 1; l <= resolution; l++) for (int l = 1; l <= resolution; l++)
{ {
F_tab[index(l,l)] = (1-2*l) * sqrt(1-t*t) * F_tab[index(l-1,l-1)]; // first diago F_tab[threadId][index(l,l)] = (1-2*l) * sqrt(1-t*t) * F_tab[threadId][index(l-1,l-1)]; // first diago
F_tab[index(l,l-1)] = t * (2*l-1) * F_tab[index(l-1,l-1)];// second diago F_tab[threadId][index(l,l-1)] = t * (2*l-1) * F_tab[threadId][index(l-1,l-1)];// second diago
for (int m = 0; m <= l-2; m++) for (int m = 0; m <= l-2; m++)
{// remaining of the line under the 2 diago {// remaining of the line under the 2 diago
F_tab[index(l,m)] = t * (2*l-1) / (float) (l-m) * F_tab[index(l-1,m)] - (l+m-1) / (float) (l-m) * F_tab[index(l-2,m)]; F_tab[threadId][index(l,m)] = t * (2*l-1) / (float) (l-m) * F_tab[threadId][index(l-1,m)] - (l+m-1) / (float) (l-m) * F_tab[threadId][index(l-2,m)];
} }
} }
} }
template <typename Tscalar,typename Tcoef> template <typename Tscalar,typename Tcoef>
void SphericalHarmonics<Tscalar,Tcoef>::compute_y_tab (Tscalar phi) void SphericalHarmonics<Tscalar,Tcoef>::compute_y_tab (Tscalar phi, unsigned int threadId)
{ {
for (int l = 0; l <= resolution; l++) for (int l = 0; l <= resolution; l++)
{ {
F_tab[index(l,0)] *= K_tab[index(l,0)]; // remove for plotting F_tab[threadId][index(l,0)] *= K_tab[index(l,0)]; // remove for plotting
} }
for (int m = 1; m <= resolution; m++) for (int m = 1; m <= resolution; m++)
...@@ -181,10 +186,10 @@ void SphericalHarmonics<Tscalar,Tcoef>::compute_y_tab (Tscalar phi) ...@@ -181,10 +186,10 @@ void SphericalHarmonics<Tscalar,Tcoef>::compute_y_tab (Tscalar phi)
for (int l = m; l <= resolution; l++) for (int l = m; l <= resolution; l++)
{ {
F_tab[index(l,m)] *= std::sqrt(2.0); // remove for plotting F_tab[threadId][index(l,m)] *= std::sqrt(2.0); // remove for plotting
F_tab[index(l,m)] *= K_tab[index(l,m)]; // remove for plotting F_tab[threadId][index(l,m)] *= K_tab[index(l,m)]; // remove for plotting
F_tab[index(l,-m)] = F_tab[index(l,m)] * sin_m_phi ; // store the values for -m<0 in the upper triangle