@inproceedings{b7efd5b6f0874b389e7d75077cea228b,
title = "Graph Embeddings for Non-IID Data Feature Representation Learning",
abstract = "Most machine learning models like Random Forest (RF) and Support Vector Machine (SVM) assume that features in the datasets are independent and identically distributed (IID). However, many datasets in the real world contain structural dependencies so neither the data observations nor the features satisfy this IID assumption. In this paper, we propose to incorporate the latent structural information in the data and learn the best embeddings for the downstream classification tasks. Specifically, we build traffic knowledge graphs for a traffic-related dataset and apply node2vec and TransE to learn the graph embeddings, which are then fed into three machine learning algorithms, namely SVM, RF, and kNN to evaluate their performance on various classification tasks. We compare the performance of these three classification models under two different representations of the same dataset: the first representation is based on traffic speed, volume, and speed limit; the second representation is the graph embeddings learned from the traffic knowledge graph. Our experimental results show that the road network information captured in the knowledge graphs is crucial for predicting traffic risk levels. Through our empirical analysis, we demonstrate knowledge graphs can be effectively used to capture the structural information in no-IID datasets.",
keywords = "Graph embedding, Knowledge graph, Non-IID",
author = "Qiang Sun and Wei Liu and Du Huynh and Mark Reynolds",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd.; 20th Australasian Data Mining Conference, AusDM 2022 ; Conference date: 12-12-2022 Through 15-12-2022",
year = "2022",
doi = "10.1007/978-981-19-8746-5_4",
language = "English",
isbn = "9789811987458",
series = "Communications in Computer and Information Science",
publisher = "Springer Science + Business Media",
pages = "43--57",
editor = "Park, {Laurence A.F.} and Simeon Simoff and Gomes, {Heitor Murilo} and Maryam Doborjeh and Boo, {Yee Ling} and Koh, {Yun Sing} and Yanchang Zhao and Graham Williams",
booktitle = "Data Mining - 20th Australasian Conference, AusDM 2022, Proceedings",
address = "United States",
}