BALL 1.5.0
poseClustering.h
Go to the documentation of this file.
1// -*- Mode: C++; tab-width: 2; -*-
2// vi: set ts=2:
3//
4
5#ifndef BALL_DOCKING_POSECLUSTERING_H
6#define BALL_DOCKING_POSECLUSTERING_H
7
8#ifndef BALL_DATATYPE_OPTIONS_H
10#endif
11
12#ifndef BALL_DOCKING_COMMON_CONFORMATIONSET_H
14#endif
15
16#ifndef BALL_MOLMEC_COMMON_SNAPSHOT_H
18#endif
19
20#ifndef BALL_STRUCTURE_ATOMBIJECTION_H
22#endif
23
24#ifndef BALL_KERNEL_SYSTEM_H
25# include <BALL/KERNEL/system.h>
26#endif
27
28#ifndef BALL_DATATYPE_STRING_H
29# include <BALL/DATATYPE/string.h>
30#endif
31
32#ifndef BALL_MATHS_VECTOR3_H
33# include <BALL/MATHS/vector3.h>
34#endif
35
36#ifndef BALL_MATHS_MATRIX44_H
37# include <BALL/MATHS/matrix44.h>
38#endif
39
40#include <Eigen/Core>
41
42#include <boost/shared_ptr.hpp>
43#include <boost/variant.hpp>
44
45// Boost 1.56+ implicitly deletes the copy constructors of stored_edge_properties. In order
46// to automatically use the respective move constructor instead, ClusterProperties has to
47// be _nothrow_ move constructible!
48#ifndef BALL_HAS_NOEXCEPT
49# define BOOST_NO_CXX11_DEFAULTED_FUNCTIONS
50# include <boost/graph/adjacency_list.hpp>
51# undef BOOST_NO_CXX11_DEFAULTED_FUNCTIONS
52#else
53# include <boost/graph/adjacency_list.hpp>
54#endif
55
56#include <set>
57#include <iostream>
58
59#ifdef BALL_HAS_TBB
60# include <tbb/parallel_reduce.h>
61# include <tbb/blocked_range.h>
62#endif
63
64//#define POSECLUSTERING_DEBUG 1
65#undef POSECLUSTERING_DEBUG
66
67namespace BALL
68{
127 {
128 public:
134 {
137 static const String CLUSTER_METHOD;
138
142
146
149 static const String RMSD_TYPE;
150
153 static const String RUN_PARALLEL;
154
155 };
156
159 {
160 static const Index CLUSTER_METHOD;
161 static const float DISTANCE_THRESHOLD;
163 static const Index RMSD_TYPE;
164 static const bool RUN_PARALLEL;
166 };
167
168 enum BALL_EXPORT RMSDType
169 {
172 CENTER_OF_MASS_DISTANCE
173 };
174
175 enum BALL_EXPORT RMSDLevelOfDetail
176 {
181 PROPERTY_BASED_ATOM_BIJECTION
182 };
183
184 enum BALL_EXPORT ClusterMethod
185 {
190 CLINK_ALTHAUS
191 };
192
194 {
195 public:
197
198 RigidTransformation(Eigen::Vector3f const& new_trans, Eigen::Matrix3f const& new_rot)
199 : translation(new_trans),
200 rotation(new_rot)
201 {}
202
203 Eigen::Vector3f translation;
204 Eigen::Matrix3f rotation;
205 };
206
212 {
213 public:
214 PosePointer(RigidTransformation const* new_trafo, SnapShot const* new_snap = 0)
215 : trafo(new_trafo),
216 snap(new_snap)
217 { }
218
219 PosePointer(SnapShot const* new_snap)
220 : trafo(0),
221 snap(new_snap)
222 { }
223
226 };
227
229 {
230 public:
233
235
237 ClusterProperties& operator=(ClusterProperties&&) BALL_NOEXCEPT;
238
241 template <class Archive>
242 void serialize(Archive& ar, const unsigned int version);
243
246 std::set<Index> poses;
247
250 Size size;
251
260 boost::variant<Eigen::VectorXf, RigidTransformation> center;
261
264 float merged_at;
265#ifdef POSECLUSTERING_DEBUG
268 float current_cluster_id;
269#endif
270 };
271
272 typedef boost::adjacency_list<boost::vecS,
273 boost::vecS,
274 boost::directedS,
276 boost::no_property,
277 unsigned int> ClusterTree;
278
279 typedef ClusterTree::vertex_descriptor ClusterTreeNode;
280
282
285
288
291 PoseClustering(ConformationSet* poses, float rmsd);
292
294 PoseClustering(System const& base_system, String transformation_file_name);
295
299
300
303
306 bool compute();
307
309
313
315 void setConformationSet(ConformationSet* new_set, bool precompute_atombijection = false);
316
321 void setBaseSystemAndPoses(System const& base_system, std::vector<PosePointer> const& poses);
322
325 // will be applied upon the current conformation
326 void setBaseSystemAndTransformations(System const& base_system, String transformation_file_name);
327
329 const ConformationSet* getConformationSet() const {return current_set_;}
330
332 ConformationSet* getConformationSet() {return current_set_;}
333
335 const std::vector<RigidTransformation> & getRigidTransformations() const {return transformations_;}
336
338 std::vector<Vector3> & getCentersOfMass() {return com_;}
339
341 std::vector<Vector3> const & getCentersOfMass() const {return com_;}
342
344 const System& getSystem() const;
345
348
350 Size getNumberOfPoses() const {return poses_.size();}
351
353 Size getNumberOfClusters() const {return clusters_.size();}
354
357 const std::set<Index>& getCluster(Index i) const;
358
361 std::set<Index>& getCluster(Index i);
362
365
367 float getClusterScore(Index i) const;
368
370 float getScore(const System sys_a, const System sys_b, Options options) const;
371
373 AtomBijection& getAtomBijection() {return atom_bijection_;}
374
376 AtomBijection const& getAtomBijection() const {return atom_bijection_;}
377
379 void applyTransformation2System(Index i, System& target_system);
380
383
386
388 float computeCompleteLinkageRMSD(Index i, Options options, bool initialize = true);
389
391 //float computeCompleteLinkageRMSD(boost::shared_ptr<ConformationSet> cluster, Option options) const;
392
394 boost::shared_ptr<System> getPose(Index i) const;
395
397 std::vector<PosePointer> const& getPoses() const {return poses_;}
398
400 boost::shared_ptr<System> getClusterRepresentative(Index i);
401
404
406 boost::shared_ptr<ConformationSet> getClusterConformationSet(Index i);
407
409 boost::shared_ptr<ConformationSet> getReducedConformationSet();
410
424 bool refineClustering(Options const& refined_options);
425
427
433
437
439
442
448 static float getRigidRMSD(Eigen::Vector3f const& t_ab, Eigen::Matrix3f const& M_ab, Eigen::Matrix3f const& covariance_matrix);
449
455 static float getSquaredRigidRMSD(Eigen::Vector3f const& t_ab, Eigen::Matrix3f const& M_ab, Eigen::Matrix3f const& covariance_matrix);
456
459 static Eigen::Matrix3f computeCovarianceMatrix(System const& system, Index rmsd_level_of_detail = C_ALPHA);
460
462
465
471 std::vector<std::set<Index> > extractClustersForThreshold(float threshold, Size min_size = 0);
472
476 std::vector<std::set<Index> > extractNBestClusters(Size n);
477
481 std::vector<std::set<Index> > filterClusters(Size min_size = 1);
482
485 void serializeWardClusterTree(std::ostream& out, bool binary = false);
486
489 void deserializeWardClusterTree(std::istream& in, bool binary = false);
490
493 void exportWardClusterTreeToGraphViz(std::ostream& out);
494
497 void exportClusterTreeToJSON(std::ostream& out);
498
500
501
504 void printClusters(std::ostream& out = std::cout) const;
505
508 void printClusterScores(std::ostream& out = std::cout);
509
510
511 protected:
512
513#ifdef BALL_HAS_TBB
516 class ComputeNearestClusterTask_
517 {
518 public:
520 ComputeNearestClusterTask_(PoseClustering* parent,
521 const std::vector<ClusterTreeNode>& active_clusters,
522 Position current_cluster,
523 Index rmsd_type);
524
526 ComputeNearestClusterTask_(ComputeNearestClusterTask_& cnct, tbb::split);
527
529 void join(ComputeNearestClusterTask_ const& cnct);
530
532 void operator() (const tbb::blocked_range<size_t>& r);
533
535 Position getMinIndex() {return my_min_index_;}
536
538 float getMinValue() {return my_min_value_;}
539
540 protected:
541 // the PoseClustering instance that called us
542 PoseClustering* parent_;
543
544 // the array we work on
545 const std::vector<ClusterTreeNode>& active_clusters_;
546
547 // the cluster to compare to everything else
548 Position current_cluster_;
549
550 // the kind of rmsd computation desired
551 Index rmsd_type_;
552
553 // the minimum index in our own block
554 Position my_min_index_;
555
556 // the minimum value in our own block
557 float my_min_value_;
558 };
559#endif
560
564 {
565 public:
566 ClusterTreeWriter_(ClusterTree const* cluster_tree)
567 : cluster_tree_(cluster_tree)
568 { }
569
570 void operator() (std::ostream& out, const ClusterTreeNode& v) const;
571
572 protected:
574 };
575
579 {
580 public:
582 : cluster_tree_(&cluster_tree)
583 {}
584
585 bool operator() (ClusterTreeNode const first, ClusterTreeNode const second) const
586 {
587 float first_value = (*cluster_tree_)[ first].merged_at;
588 float second_value = (*cluster_tree_)[second].merged_at;
589
590 return first_value < second_value;
591 }
592
593 protected:
595 };
596
597 // trivial complete linkage implementation
598 // with O(n^2) space request
600
601 // space efficient (SLINK or CLINK) clustering
603
604 //
606
607 // implementation of a single linkage clustering as described in
608 // R. Sibson: SLINK: an optimally efficient algorithm for the single-link cluster method.
609 // The Computer Journal. 16, 1, British Computer Society, 1973, p. 30-34
610 void slinkInner_(int current_level);
611
612 // implementation of a complete linkage clustering as described in
613 // D. Defays: An efficient algorithm for a complete link method.
614 // The Computer Journal. 20, 4, British Computer Society, 1977, p. 364-366.
615 void clinkInner_(int current_level);
616
617 // implememtation of the nearest neighbor chain clustering algorithm
618 // as described in:
619 // Murtagh, Fionn (1983): "A survey of recent advances in hierarchical clustering algorithms",
620 // The Computer Journal 26 (4): 354–359
622
623 void initWardDistance_(Index rmsd_type);
624
626
628
629 std::set<Index> collectClusterBelow_(ClusterTreeNode const& v);
630
631 // compute the center of masses
633
634 // precompute an atom bijection for faster access
636
637 // check the given atom wrt choice of option RMSD_LEVEL_OF_DETAIL
638 bool static isExcludedByLevelOfDetail_(Atom const* atom, Index rmsd_level_of_detail);
639
640 // distance between cluster i and cluster j
641 float getClusterRMSD_(Index i, Index j, Index rmsd_type);
642
643 // reads the poses given as transformations from a file
644 // Note: the previously given system will be taken
645 // as untransformed reference, e.g. all transformations
646 // will be applied upon the current conformation
648
649 // compute the RMSD between two "poses"
650 float getRMSD_(Index i, Index j, Index rmsd_type);
651
652 // store pointers to the snapshots in the poses vector
654
655 //
656 void printCluster_(Size i, std::ostream& out = std::cout) const;
657
658 //
659 void printVariables_(int a, int b, double c, int d, double e, int current_level);
660
661 //
662 void clear_();
663
664 // Used by exportClusterTreeToJSON
665 void exportToJSONDFS_(ClusterTreeNode const& current, String& result);
666
667 // only used by trivial clustering
668 Eigen::MatrixXd pairwise_scores_;
669
672
674 std::vector< std::set<Index> > clusters_;
675
676 std::vector< Index > cluster_representatives_;
677
679 std::vector< float > cluster_scores_;
680
683
684
685 // ----- unified access to the poses, independent of their type
686 // (the poses are either stored in transformations_, or in current_set_)
687 std::vector<PosePointer> poses_;
688
689 // ----- data structure for transformation input (instead of snapshots)
690 std::vector<RigidTransformation> transformations_;
691
692 Eigen::Matrix3f covariance_matrix_;
693
694 // TODO: maybe use a const - ptr instead?
696
697 // the reference state
699
700 // flag indicating the use of transformation as input
702
703 // do we need to delete the conformation set, that was
704 // created by converting transformations to snapshots
706
707 // ------ data structures for slink and clink
708
709 // stores the distance at which this indexed element has longer
710 // the largest index of its cluster
711 std::vector<double> lambda_;
712
713 // the index of the cluster representative at merge-time
714 // (element with largest index)
715 std::vector<int> pi_;
716
717 std::vector<double> mu_;
718
719
720 // ----- data structures for nearest neighbor chain ward
722
723 // ----- data structure for CENTER_OF_GRAVITY_CLINK
724
725 // the geometric center of mass
726 std::vector<Vector3> com_;
727
728 // ----- general data structures
729
730 // We cache the atom bijection for faster
731 // RMSD computation; this is possible, since the system topology does
732 // not change
734
735 // helper dummies to speed up snapshot application
738
741 }; //class PoseClustering
742} //namesspace BALL
743
744#endif // BALL_DOCKING_POSECLUSTERING_H
SNAPSHOT_RMSD
C_ALPHA
RIGID_RMSD
ALL_ATOMS
BACKBONE
TRIVIAL_COMPLETE_LINKAGE
NEAREST_NEIGHBOR_CHAIN_WARD
SLINK_SIBSON
HEAVY_ATOMS
CLINK_DEFAYS
STL namespace.
Definition: constants.h:13
BALL_SIZE_TYPE Position
BALL_INDEX_TYPE Index
BALL_EXTERN_VARIABLE const double c
Definition: constants.h:149
Computation of clusters of docking poses.
void deserializeWardClusterTree(std::istream &in, bool binary=false)
float computeWardDistance_(ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type)
boost::shared_ptr< System > getClusterRepresentative(Index i)
returns the "central cluster" conformation of cluster i as system
AtomBijection const & getAtomBijection() const
returns a const reference to the cached AtomBijection
float getClusterScore(Index i) const
returns the score of cluster i
std::vector< PosePointer > poses_
Options options
options
void clinkInner_(int current_level)
void setConformationSet(ConformationSet *new_set, bool precompute_atombijection=false)
sets the poses to be clustered, the conformation set's reference system will the base system
bool nearestNeighborChainCompute_()
Eigen::Matrix3f covariance_matrix_
ConformationSet * getConformationSet()
returns the poses to be clustered as ConformationSet
void exportToJSONDFS_(ClusterTreeNode const &current, String &result)
void precomputeAtomBijection_()
std::vector< int > pi_
AtomBijection atom_bijection_
bool readTransformationsFromFile_(String filename)
const std::vector< RigidTransformation > & getRigidTransformations() const
returns the poses as rigid transformations
const ConformationSet * getConformationSet() const
returns the poses to be clustered as ConformationSet
boost::shared_ptr< ConformationSet > getReducedConformationSet()
returns a ConformationSet containing one structure per cluster
float getScore(const System sys_a, const System sys_b, Options options) const
returns the score between two poses given as systems
void setBaseSystemAndTransformations(System const &base_system, String transformation_file_name)
void exportWardClusterTreeToGraphViz(std::ostream &out)
float getRMSD_(Index i, Index j, Index rmsd_type)
virtual ~PoseClustering()
Index findClusterRepresentative(Index i)
returns the index of the cluster representative
std::vector< float > cluster_scores_
the scores of the clusters
void updateWardDistance_(ClusterTreeNode parent, ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type)
static float getRigidRMSD(Eigen::Vector3f const &t_ab, Eigen::Matrix3f const &M_ab, Eigen::Matrix3f const &covariance_matrix)
std::set< Index > collectClusterBelow_(ClusterTreeNode const &v)
void applyTransformation2System(Index i, System &target_system)
apply a transformation to a given system
std::vector< std::set< Index > > extractClustersForThreshold(float threshold, Size min_size=0)
System & getSystem()
returns the reference pose
boost::shared_ptr< ConformationSet > getClusterConformationSet(Index i)
returns cluster i as ConformationSet
void exportClusterTreeToJSON(std::ostream &out)
void serializeWardClusterTree(std::ostream &out, bool binary=false)
void initWardDistance_(Index rmsd_type)
std::vector< Vector3 > const & getCentersOfMass() const
returns the centers of mass-vector, const version (non-empty only for CENTER_OF_MASS_DISTANCE)
std::vector< std::set< Index > > filterClusters(Size min_size=1)
std::vector< PosePointer > const & getPoses() const
returns poses as PosePointer
PoseClustering()
Default constructor.
boost::adjacency_list< boost::vecS, boost::vecS, boost::directedS, ClusterProperties, boost::no_property, unsigned int > ClusterTree
Eigen::MatrixXd pairwise_scores_
boost::shared_ptr< System > getPose(Index i) const
returns the complete linkage RMSD of a pose set
PoseClustering(ConformationSet *poses, float rmsd)
void setBaseSystemAndPoses(System const &base_system, std::vector< PosePointer > const &poses)
const std::set< Index > & getCluster(Index i) const
std::vector< double > lambda_
std::vector< RigidTransformation > transformations_
ConformationSet * current_set_
the ConformationSet we wish to cluster
PoseClustering(System const &base_system, String transformation_file_name)
PoseClustering for a given set of rigid transformations of a base structure.
Index rmsd_level_of_detail_
the RMSD definition used for clustering
void storeSnapShotReferences_()
BALL_CREATE(PoseClustering)
float getClusterRMSD_(Index i, Index j, Index rmsd_type)
ClusterTree::vertex_descriptor ClusterTreeNode
Size getNumberOfPoses() const
returns the number of poses
void printClusterScores(std::ostream &out=std::cout)
bool refineClustering(Options const &refined_options)
void convertTransformations2Snaphots()
convert the poses to SnapShots
std::set< Index > & getCluster(Index i)
std::vector< double > mu_
void slinkInner_(int current_level)
Size getNumberOfClusters() const
returns the number of clusters found
std::vector< Index > cluster_representatives_
const System & getSystem() const
returns the reference pose
std::vector< Vector3 > & getCentersOfMass()
returns the centers of mass-vector (non-empty only for CENTER_OF_MASS_DISTANCE)
std::vector< std::set< Index > > extractNBestClusters(Size n)
static float getSquaredRigidRMSD(Eigen::Vector3f const &t_ab, Eigen::Matrix3f const &M_ab, Eigen::Matrix3f const &covariance_matrix)
void convertSnaphots2Transformations()
convert the poses to rigid transformations
Size getClusterSize(Index i) const
returns the size of cluster i
std::vector< std::set< Index > > clusters_
the clusters: sets of pose indices
std::vector< Vector3 > com_
AtomBijection & getAtomBijection()
returns a reference to the cached AtomBijection
ClusterTree cluster_tree_
The tree built during hierarchical clustering.
float computeCompleteLinkageRMSD(Index i, Options options, bool initialize=true)
returns the complete linkage RMSD of cluster i
void printClusters(std::ostream &out=std::cout) const
static Eigen::Matrix3f computeCovarianceMatrix(System const &system, Index rmsd_level_of_detail=C_ALPHA)
static bool isExcludedByLevelOfDetail_(Atom const *atom, Index rmsd_level_of_detail)
void printVariables_(int a, int b, double c, int d, double e, int current_level)
void printCluster_(Size i, std::ostream &out=std::cout) const
static const String RUN_PARALLEL
static const String DISTANCE_THRESHOLD
static const String RMSD_TYPE
static const String CLUSTER_METHOD
static const String RMSD_LEVEL_OF_DETAIL
Default values for options.
static const float DISTANCE_THRESHOLD
static const bool USE_CENTER_OF_MASS_PRECLINK
static const bool RUN_PARALLEL
static const Index RMSD_TYPE
static const Index RMSD_LEVEL_OF_DETAIL
static const Index CLUSTER_METHOD
RigidTransformation(Eigen::Vector3f const &new_trans, Eigen::Matrix3f const &new_rot)
PosePointer(RigidTransformation const *new_trafo, SnapShot const *new_snap=0)
RigidTransformation const * trafo
PosePointer(SnapShot const *new_snap)
ClusterProperties & operator=(const ClusterProperties &)
ClusterProperties(ClusterProperties &&) BALL_NOEXCEPT
ClusterProperties(const ClusterProperties &)
ClusterTreeWriter_(ClusterTree const *cluster_tree)
ClusterTreeNodeComparator(ClusterTree &cluster_tree)
#define BALL_EXPORT
Definition: COMMON/global.h:50