@article{Yildirim2011396,
title = {Parallel wavelet-based clustering algorithm on \{GPUs\} using \{CUDA\} },
journal = {Procedia Computer Science },
volume = {3},
number = {},
pages = {396 - 400},
year = {2011},
note = {World Conference on Information Technology },
issn = {1877-0509},
doi = {http://dx.doi.org/10.1016/j.procs.2010.12.066},
url = {http://www.sciencedirect.com/science/article/pii/S1877050910004412},
author = {Ahmet Artu Yıldırım and Cem Özdoğan},
keywords = {\{GPU\} computing},
keywords = {CUDA},
keywords = {Cluster analysis},
keywords = {WaveCluster algorithm },
abstract = {There has been a substantial interest in scientific and engineering computing community to speed up the CPU-intensive tasks on graphical processing units (GPUs) with the development of many-core \{GPUs\} as having very large memory bandwidth and computational power. Cluster analysis is a widely used technique for grouping a set of objects into classes of “similar” objects and commonly used in many fields such as data mining, bioinformatics and pattern recognition. WaveCluster defines the notion of cluster as a dense region consisting of connected components in the transformed feature space. In this study, we present the implementation of WaveCluster algorithm as a novel clustering approach based on wavelet transform to \{GPU\} level parallelization and investigate the parallel performance for very large spatial datasets. The \{CUDA\} implementations of two main sub-algorithms of WaveCluster approach; namely extraction of low-frequency component from the signal using wavelet transform and connected component labeling are presented. Then, the corresponding performance evaluations are reported for each sub-algorithm. Divide and conquer approach is followed on the implementation of wavelet transform and multi-pass sliding window approach on the implementation of connected component labeling. The maximum achieved speedup is found in kernel as 107x in the computation of extraction of the low-frequency component and 6x in the computation of connected component labeling with respect to the sequential algorithms running on the CPU. }
}

