@inproceedings{cb807f67ca434ef5a0c4caf525ab09f8,
title = "TweEvent: A dataset of Twitter messages about events in the Ukraine conflict",
abstract = "Information about incidents within a conflict, e.g., shelling of an area of interest, is scattered amongst different data or media sources. For example, the ACLED dataset continuously documents local incidents recorded within the context of a specific conflict such as Russia{\textquoteright}s war in Ukraine. However, these blocks of information might be incomplete. Therefore, it is useful to collect data from several sources to enrich the information pool of a certain incident. In this paper, we present a dataset of social media messages covering the same war events as those collected in the ACLED dataset. The information is extracted from automatically geocoded Twitter text data using state-of-the-art natural language processing methods based on large pre-trained language models (LMs). Our method can be applied to various textual data sources. Both the data as well as the approach can serve to help human analysts obtain a broader understanding of conflict events.",
keywords = "Conflict, Dataset, NLP, Social Media, Ukraine",
author = "Samyo Rode-Hasinger and Matthias H{\"a}berle and Daniel Racek and Anna Kruspe and Zhu, {Xiao Xiang}",
note = "Publisher Copyright: {\textcopyright} 2023 Information Systems for Crisis Response and Management, ISCRAM. All rights reserved.; 20th Global Information Systems for Crisis Response and Management Conference, ISCRAM 2023 ; Conference date: 28-05-2023 Through 31-05-2023",
year = "2023",
language = "English",
series = "Proceedings of the International ISCRAM Conference",
publisher = "Information Systems for Crisis Response and Management, ISCRAM",
pages = "407--416",
booktitle = "Proceedings - 20th Global Information Systems for Crisis Response and Management Conference, ISCRAM 2023",
}