@inproceedings{f238ad7ed6f54e9c94c6f510e86add06,
title = "Bias in word embeddings",
abstract = "Word embeddings are a widely used set of natural language processing techniques that map words to vectors of real numbers. These vectors are used to improve the quality of generative and predictive models. Recent studies demonstrate that word embeddings contain and amplify biases present in data, such as stereotypes and prejudice. In this study, we provide a complete overview of bias in word embeddings. We develop a new technique for bias detection for gendered languages and use it to compare bias in embeddings trained on Wikipedia and on political social media data. We investigate bias diffusion and prove that existing biases are transferred to further machine learning models. We test two techniques for bias mitigation and show that the generally proposed methodology for debiasing models at the embeddings level is insufficient. Finally, we employ biased word embeddings and illustrate that they can be used for the detection of similar biases in new data. Given that word embeddings are widely used by commercial companies, we discuss the challenges and required actions towards fair algorithmic implementations and applications.",
keywords = "Bias, Detection, Diffusion, Fairness, Homophobia, Mitigation, Racism, Sexism, Word embeddings",
author = "Orestis Papakyriakopoulos and Simon Hegelich and Serrano, {Juan Carlos Medina} and Fabienne Marco",
note = "Publisher Copyright: {\textcopyright} 2020 Copyright held by the owner/author(s).; 3rd ACM Conference on Fairness, Accountability, and Transparency, FAT* 2020 ; Conference date: 27-01-2020 Through 30-01-2020",
year = "2020",
month = jan,
day = "27",
doi = "10.1145/3351095.3372843",
language = "English",
series = "FAT* 2020 - Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency",
publisher = "Association for Computing Machinery, Inc",
pages = "446--457",
booktitle = "FAT* 2020 - Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency",
}