@Article{Poulson:2012:ENF, author = "Jack Poulson and Bryan Marker and Robert A. van de Geijn and Jeff R. Hammond and Nichols A. Romero", title = "Elemental: A New Framework for Distributed Memory Dense Matrix Computations", journal = "{ACM} Transactions on Mathematical Software", volume = 39, number = 2, month = feb, year = 2013, pages = "13:1--13:24", URL = "http://doi.acm.org/10.1145/2427023.2427030", accepted = "14 February 2012", abstract = " Parallelizing dense matrix computations to distributed memory architectures is a well-studied subject and generally considered to be among the best understood domains of parallel computing. Two packages, developed in the mid 1990s, still enjoy regular use: ScaLAPACK and PLAPACK. With the advent of many-core architectures, which may very well take the shape of distributed memory architectures within a single processor, these packages must be revisited since the traditional MPI-based approaches will likely need to be extended. Thus, this is a good time to review lessons learned since the introduction of these two packages and to propose a simple yet effective alternative. Preliminary performance results show the new solution achieves competitive, if not superior, performance on large clusters.", }