@ARTICLE{26583204_162638914_2015, author = {Alexey Masyutin}, keywords = {, credit scoring, social networks, probability of default, social dataVkontakte}, title = {

Credit scoring based on social network data

}, journal = {}, year = {2015}, number = {3(33) }, pages = {15-23}, url = {https://bijournal.hse.ru/en/2015--3(33) /162638914.html}, publisher = {}, abstract = {Alexey A. Masyutin - Post-graduate student, School of Data Analysis and Artificial Intelligence, Faculty of Computer Science, National Research University Higher School of Economics. Address: 20, Myasnitskaya str., Moscow, 101000, Russian Federation.E-mail: alexey.masyutin@gmail.com      Social networks accumulate huge amounts of information, which can provide valuable insights on people’s behavior. In this paper, we use social data from Vkontakte, Russia’s most popular social network, to discriminate between the solvent and delinquent debtors of credit organizations. Firstly, we present the datacenter architecture for social data retrieval. It has several functions, such as client matching, user profile parsing, API communication and data storing.  Secondly, we develop two credit scorecards based exclusively on social data. The first scorecard uses the classical default definition: 90 days delinquency within 12 months since the loan origination. The second scorecard uses the classical fraud definition as falling into default within the first 3 months. Both scorecards undertake WOE-transformation of the input data and run logistic regression afterwards. The findings are as follows: social data better predict fraudulent cases rather than ordinary defaults, social data may be used to enrich the classical application scorecards. The performance of the scorecards is at the acceptable level, even though the input data used were exclusively from the social network. As soon as credit history (which usually serves as input data in the classical scorecards) is not rich enough for young clients, we find that the social data can bring value to the scoring systems performance. The paper is in the area of interest of banks and microfinance organizations.}, annote = {Alexey A. Masyutin - Post-graduate student, School of Data Analysis and Artificial Intelligence, Faculty of Computer Science, National Research University Higher School of Economics. Address: 20, Myasnitskaya str., Moscow, 101000, Russian Federation.E-mail: alexey.masyutin@gmail.com      Social networks accumulate huge amounts of information, which can provide valuable insights on people’s behavior. In this paper, we use social data from Vkontakte, Russia’s most popular social network, to discriminate between the solvent and delinquent debtors of credit organizations. Firstly, we present the datacenter architecture for social data retrieval. It has several functions, such as client matching, user profile parsing, API communication and data storing.  Secondly, we develop two credit scorecards based exclusively on social data. The first scorecard uses the classical default definition: 90 days delinquency within 12 months since the loan origination. The second scorecard uses the classical fraud definition as falling into default within the first 3 months. Both scorecards undertake WOE-transformation of the input data and run logistic regression afterwards. The findings are as follows: social data better predict fraudulent cases rather than ordinary defaults, social data may be used to enrich the classical application scorecards. The performance of the scorecards is at the acceptable level, even though the input data used were exclusively from the social network. As soon as credit history (which usually serves as input data in the classical scorecards) is not rich enough for young clients, we find that the social data can bring value to the scoring systems performance. The paper is in the area of interest of banks and microfinance organizations.} }