Skip to content

Instantly share code, notes, and snippets.

@vertexclique
Last active December 20, 2020 14:40
Show Gist options
  • Save vertexclique/1043b5f570bd130c423ab0081c73e67d to your computer and use it in GitHub Desktop.
Save vertexclique/1043b5f570bd130c423ab0081c73e67d to your computer and use it in GitHub Desktop.
@article{Ziauddin2017,
abstract = {In recent years, the data warehouse industry has witnessed decreased use of indexing but increased use of compression and clustering of data facilitating efficient data access and data pruning in the query processing area. A classic example of data pruning is the partition pruning, which is used when table data is range or list partitioned. But lately, techniques have been developed to prune data at a lower granularity than a table partition or sub-partition. A good example is the use of data pruning structure called zone map. A zone map prunes zones of data from a table on which it is defined. Data pruning via zone map is very effective when the table data is clustered by the filtering columns. The database industry has offered support to cluster data in tables by its local columns, and to define zone maps on clustering columns of such tables. This has helped improve the performance of queries that contain filter predicates on local columns. However, queries in data warehouses are typically based on star/snowflake schema with filter predicates usually on columns of the dimension tables joined to a fact table. Given this, the performance of data warehouse queries can be significantly improved if the fact table data is clustered by columns of dimension tables together with zone maps that maintain min/max value ranges of these clustering columns over zones of fact table data. In recognition of this opportunity of significantly improving the performance of data warehouse queries, Oracle 12c release 1 has introduced the support for dimension based clustering of fact tables together with data pruning of the fact tables via dimension based zone maps.},
annote = {zone maps / oracle},
author = {Ziauddin, Mohamed and Witkowski, Andrew and Kim, You Jung and Potapov, Dmitry and Lahorani, Janaki and Krishna, Murali},
doi = {10.14778/3137765.3137769},
issn = {21508097},
journal = {Proceedings of the VLDB Endowment},
number = {12},
pages = {1622--1633},
title = {{Dimensions based data clustering and zone maps}},
volume = {10},
year = {2017}
}
@article{Moerkotte1998,
abstract = {Small Materialized Aggregates (SMAs for short) are considered a highly flexible and versatile alternative for materialized data cubes. The basic idea is to compute many aggregate values for small to medium-sized buckets of tupies. These aggre-gates are then used to speed up query processing. We present the general idea and present an application of SMAs to the TPC-D benchmark. We show that applica-tion of SMAs to TPC-D Query 1 results in a speed up of two orders of magnitude. Then, we elaborate on the problem of query processing in the presence of SMAs. Last, we briefly discuss some furt her tuning possibilities for SMAs.},
author = {Moerkotte, Guido},
isbn = {1-55860-566-5},
journal = {Proceedings of the 24rd International Conference on Very Large Data Bases},
pages = {476--487},
title = {{Small Materialized Aggregates: A Light Weight Index Structure for Data Warehousing}},
url = {https://pdfs.semanticscholar.org/6c15/13e2823131a319dbcce4780bdb3e3563c089.pdf%0Ahttps://ub-madoc.bib.uni-mannheim.de/1745/1/1998_01.pdf%0Ahttp://dl.acm.org/citation.cfm?id=671173%0Ahttp://dl.acm.org/citation.cfm?id=645924.671173},
year = {1998}
}
@article{Nica2017,
abstract = {We introduce a new concept of leveraging traditional data statistics as dynamic data integrity constraints. These data statistics produce transient database constraints, which are valid as long as they can be proven to be consistent with the current data. We denote this type of data statistics by constraint data statistics, their properties needed for consistency checking by consistency metadata, and their implied integrity constraints by implied data statistics constraints (implied constraints for short). Implied constraints are valid integrity constraints which are powerful query optimization tools employed, just as traditional database constraints, in semantic query transformation (aka query reformulation), partition pruning, runtime optimization, and semi-join reduction, to name a few. To our knowledge, this is the first work introducing this novel and powerful concept of deriving implied integrity constraints from data statistics. We discuss theoretical aspects of the constraint data statistics concept and their integration into query processing. We present the current architecture of data statistics management in SAP HANA and detail how constraint data statistics are designed and integrated into this architecture. As an instantiation of this framework, we consider dynamic partition pruning for data aging scenarios. We discuss our current implementation for constraint data statistics objects in SAP HANA which can be used for dynamic partition pruning. We enumerate their properties and show how consistency checking for implied integrity constraints is supported in the data statistics architecture. Our experimental evaluations on the TPC-H benchmark and a real customer application confirm the effectiveness of the implied integrity constraints; (1) for 59% of TPC-H queries, constraint data statistics utilization results in pruning cold partitions and reducing memory consumption, and (2) we observe up to 3 orders of magnitude speed-up in query processing time, for a real customer running an S/4HANA application.},
author = {Nica, Anisoara and Sherkat, Reza and Andrei, Mihnea and Cheng, Xun and Heidel, Martin and Bensberg, Christian and Gerwens, Heiko},
doi = {10.14778/3137765.3137772},
issn = {21508097},
journal = {Proceedings of the VLDB Endowment},
number = {12},
pages = {1658--1669},
title = {{Statisticum: Data statistics management in SAP HANA}},
volume = {10},
year = {2017}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment