@ARTICLE{26583204_143012675_2014, author = {Mikhail Orlov}, keywords = {, stratification, rank aggregation, multicriteria, weighted sum, quadratic programming, optimizationbibliometrics}, title = {

An algorithm for multicriteria stratification

}, journal = {}, year = {2014}, number = {4 (30)}, pages = {24-35}, url = {https://bijournal.hse.ru/en/2014--4 (30)/143012675.html}, publisher = {}, abstract = {Mikhail A. Orlov - Post-graduate Student, Department of Data Analysis and Artificial Intelligence, Faculty of Computer Science, National Research University Higher School of Economics.Address: 20, Myasnitskaya street, Moscow, 101000, Russian Federation.E-mail: ormian@mail.ru       This paper elaborates an approach to the problem of multicriteria ranking referred to as multicriteria stratification. The target of stratification is an ordered partition with predefined number of classes - strata rather than a complete ranking of the set of objects. Ranking is computed by means of linear convolution of criteria with some weights. These weights are based on assumption that data can fit some linear structure so that "parallel" layers can be identified - strata.      In the paper [6] the authors formulated the problem of multicriteria stratification as a task of minimization of a cost function depending on criteria weights; however the algorithm proposed in that paper to address the emerging task based on random searching has demonstrated low performance in comparison to some other stratification approaches.      In this paper a new algorithm based on quadratic programming is proposed to optimize the multicriteria stratification target function. A more sophisticated synthetic data generator for a comparative study of the stratification algorithm has been developed.  The new data generator has more parameters to tune and allows more flexible control of geometry of synthetic strata: orientation, thickness, spread and intensity of layers that enables to pay due regard to real data structure.      The novel algorithm has been compared experimentally with existing stratification approaches by involving synthetic data, and its competitiveness has been shown in the majority of case studies. Two real-world datasets have been processed - bibliometrical indicators of 118 scientific journals and parameters of publication activities of 102 countries. The new algorithm applied to handle these data has produced sensible and well interpretable outputs. Furthermore, on these data the proposed algorithm found the most coherent multicriteria stratification to those computed by each single criterion}, annote = {Mikhail A. Orlov - Post-graduate Student, Department of Data Analysis and Artificial Intelligence, Faculty of Computer Science, National Research University Higher School of Economics.Address: 20, Myasnitskaya street, Moscow, 101000, Russian Federation.E-mail: ormian@mail.ru       This paper elaborates an approach to the problem of multicriteria ranking referred to as multicriteria stratification. The target of stratification is an ordered partition with predefined number of classes - strata rather than a complete ranking of the set of objects. Ranking is computed by means of linear convolution of criteria with some weights. These weights are based on assumption that data can fit some linear structure so that "parallel" layers can be identified - strata.      In the paper [6] the authors formulated the problem of multicriteria stratification as a task of minimization of a cost function depending on criteria weights; however the algorithm proposed in that paper to address the emerging task based on random searching has demonstrated low performance in comparison to some other stratification approaches.      In this paper a new algorithm based on quadratic programming is proposed to optimize the multicriteria stratification target function. A more sophisticated synthetic data generator for a comparative study of the stratification algorithm has been developed.  The new data generator has more parameters to tune and allows more flexible control of geometry of synthetic strata: orientation, thickness, spread and intensity of layers that enables to pay due regard to real data structure.      The novel algorithm has been compared experimentally with existing stratification approaches by involving synthetic data, and its competitiveness has been shown in the majority of case studies. Two real-world datasets have been processed - bibliometrical indicators of 118 scientific journals and parameters of publication activities of 102 countries. The new algorithm applied to handle these data has produced sensible and well interpretable outputs. Furthermore, on these data the proposed algorithm found the most coherent multicriteria stratification to those computed by each single criterion} }