2022
Mohsen, Fadi; Karastoyanova, Dimka; Azzopardi, George
Early detection of violating Mobile Apps: A data-driven predictive model approach Journal Article
Links | BibTeX | Altmetric | Tags: predictive analysis
@article{Mohsen2022,
title = {Early detection of violating Mobile Apps: A data-driven predictive model approach },
author = {Fadi Mohsen and Dimka Karastoyanova and George Azzopardi},
doi = {https://doi.org/10.1016/j.sasc.2022.200045},
year = {2022},
date = {2022-12-01},
urldate = {2022-12-01},
journal = {Systems and Soft Computing},
volume = {4},
number = {200045},
keywords = {predictive analysis},
pubstate = {published},
tppubtype = {article}
}
Ahmad Alsahaf,; Radu Gheorghe,; André Hidalgo,; Nicolai Petkov,; Azzopardi, George
Pre-insemination prediction of dystocia in dairy cattle Journal Article
Links | BibTeX | Altmetric | Tags: predictive analysis, smart farming
@article{Alsahaf2022,
title = {Pre-insemination prediction of dystocia in dairy cattle},
author = {Ahmad Alsahaf, and Radu Gheorghe, and Andr\'{e} Hidalgo, and Nicolai Petkov, and George Azzopardi
},
doi = {https://doi.org/10.1016/j.prevetmed.2022.105812},
year = {2022},
date = {2022-12-01},
urldate = {2022-12-01},
journal = {Preventive Veterinary Medicine},
volume = {210},
number = {105812},
keywords = {predictive analysis, smart farming},
pubstate = {published},
tppubtype = {article}
}
Alsahaf, Ahmad; Petkov, Nicolai; Shenoy, Vikram; Azzopardi, George
A framework for feature selection through boosting Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: feature ranking, feature selection, predictive analysis
@article{Alsahaf2022c,
title = {A framework for feature selection through boosting},
author = {Ahmad Alsahaf and Nicolai Petkov and Vikram Shenoy and George Azzopardi},
doi = {https://doi.org/10.1016/j.eswa.2021.115895},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Expert Systems with Applications},
volume = {187},
pages = {115895},
publisher = {Pergamon},
abstract = {As dimensions of datasets in predictive modelling continue to grow, feature selection becomes increasingly practical. Datasets with complex feature interactions and high levels of redundancy still present a challenge to existing feature selection methods. We propose a novel framework for feature selection that relies on boosting, or sample re-weighting, to select sets of informative features in classification problems. The method uses as its basis the feature rankings derived from fast and scalable tree-boosting models, such as XGBoost. We compare the proposed method to standard feature selection algorithms on 9 benchmark datasets. We show that the proposed approach reaches higher accuracies with fewer features on most of the tested datasets, and that the selected features have lower redundancy.},
keywords = {feature ranking, feature selection, predictive analysis},
pubstate = {published},
tppubtype = {article}
}
2021
Lövdal, S. Sofie; Hartigh, Ruud J. R. Den; Azzopardi, George
Injury Prediction in Competitive Runners With Machine Learning Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: predictive analysis, sport science, wearables
@article{injury2021b,
title = {Injury Prediction in Competitive Runners With Machine Learning},
author = {S. Sofie L\"{o}vdal and Ruud J.R. Den Hartigh and George Azzopardi},
doi = {https://doi.org/10.1123/ijspp.2020-0518},
year = {2021},
date = {2021-04-29},
urldate = {2021-04-29},
journal = {International Journal of Sports Physiology and Performance},
volume = {16},
issue = {10},
pages = {1522-1531},
abstract = {Purpose: Staying injury free is a major factor for success in sports. Although injuries are difficult to forecast, novel technologies and data-science applications could provide important insights. Our purpose was to use machine learning for the prediction of injuries in runners, based on detailed training logs. Methods: Prediction of injuries was evaluated on a new data set of 74 high-level middle- and long-distance runners, over a period of 7 years. Two analytic approaches were applied. First, the training load from the previous 7 days was expressed as a time series, with each day’s training being described by 10 features. These features were a combination of objective data from a global positioning system watch (eg, duration, distance), together with subjective data about the exertion and success of the training. Second, a training week was summarized by 22 aggregate features, and a time window of 3 weeks before the injury was considered. Results: A predictive system based on bagged XGBoost machine-learning models resulted in receiver operating characteristic curves with average areas under the curves of 0.724 and 0.678 for the day and week approaches, respectively. The results of the day approach especially reflect a reasonably high probability that our system makes correct injury predictions. Conclusions: Our machine-learning-based approach predicts a sizable portion of the injuries, in particular when the model is based on training-load data in the days preceding an injury. Overall, these results demonstrate the possible merits of using machine learning to predict injuries and tailor training programs for athletes.},
keywords = {predictive analysis, sport science, wearables},
pubstate = {published},
tppubtype = {article}
}
2020
Farrugia, Steven; Ellul, Joshua; Azzopardi, George
Detection of illicit accounts over the Ethereum blockchain Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: blockchain, fraud detection, predictive analysis
@article{farrugia2020detection,
title = {Detection of illicit accounts over the Ethereum blockchain},
author = {Steven Farrugia and Joshua Ellul and George Azzopardi},
doi = {https://doi.org/10.1016/j.eswa.2020.113318},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Expert Systems with Applications},
volume = {150},
pages = {113318},
publisher = {Pergamon},
abstract = {The recent technological advent of cryptocurrencies and their respective benefits have been shrouded with a number of illegal activities operating over the network such as money laundering, bribery, phishing, fraud, among others. In this work we focus on the Ethereum network, which has seen over 400 million transactions since its inception. Using 2179 accounts flagged by the Ethereum community for their illegal activity coupled with 2502 normal accounts, we seek to detect illicit accounts based on their transaction history using the XGBoost classifier. Using 10 fold cross-validation, XGBoost achieved an average accuracy of 0.963 ( ± 0.006) with an average AUC of 0.994 ( ± 0.0007). The top three features with the largest impact on the final model output were established to be ‘Time diff between first and last (Mins)’, ‘Total Ether balance’ and ‘Min value received’. Based on the results we conclude that the proposed approach is highly effective in detecting illicit accounts over the Ethereum network. Our contribution is multi-faceted; firstly, we propose an effective method to detect illicit accounts over the Ethereum network; secondly, we provide insights about the most important features; and thirdly, we publish the compiled data set as a benchmark for future related works.},
keywords = {blockchain, fraud detection, predictive analysis},
pubstate = {published},
tppubtype = {article}
}
Heide, EMM; Kamphuis, C; Veerkamp, RF; Athanasiadis, IN; Azzopardi, G; Pelt, ML; Ducro, BJ
Improving predictive performance on survival in dairy cattle using an ensemble learning approach Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: ensemble learning, predictive analysis, smart farming
@article{van2020improving,
title = {Improving predictive performance on survival in dairy cattle using an ensemble learning approach},
author = {EMM Heide and C Kamphuis and RF Veerkamp and IN Athanasiadis and G Azzopardi and ML Pelt and BJ Ducro},
doi = {https://doi.org/10.1016/j.compag.2020.105675},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Computers and Electronics in Agriculture},
volume = {177},
number = {105675},
publisher = {Elsevier},
abstract = {Cow survival is a complex trait that combines traits like milk production, fertility, health and environmental factors such as farm management. This complexity makes survival difficult to predict accurately. This is probably the reason why few studies attempted to address this problem and no studies are published that use ensemble methods for this purpose. We explored if we could improve prediction of cow survival to second lactation, when predicted at five different moments in a cow’s life, by combining the predictions of multiple (weak) methods in an ensemble method. We tested four ensemble methods: majority voting rule, multiple logistic regression, random forest and naive Bayes. Precision, recall, balanced accuracy, area under the curve (AUC) and gains in proportion of surviving cows in a scenario where the best 50% were selected were used to evaluate the ensemble model performance. We also calculated correlations between the ensemble models and obtained McNemar’s test statistics. We compared the performance of the ensemble methods against those of the individual methods. We also tested if there was a difference in performance metrics when continuous (from 0 to 1) and binary (0 or 1) prediction outcomes were used. In general, using continuous prediction output resulted in higher performance metrics than binary ones. AUCs for models ranged from 0.561 to 0.731, with generally increasing performance at moments later in life. Precision, AUC and balanced accuracy values improved significantly for the naive Bayes and multiple logistic regression ensembles in at least one data set, although performance metrics did remain low overall. The multiple logistic regression ensemble method resulted in equal or better precision, AUC, balanced accuracy and proportion of animals surviving on all datasets and was significantly different from the other ensembles in three out of five moments. The random forest ensemble method resulted in the least significant improvement over the individual methods.},
keywords = {ensemble learning, predictive analysis, smart farming},
pubstate = {published},
tppubtype = {article}
}
Wang, Xueyi; Ellul, Joshua; Azzopardi, George
Elderly fall detection systems: A literature survey Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: convnets, deep learning, egocentric vision, fall detection, predictive analysis, wearables
@article{wang2020elderly,
title = {Elderly fall detection systems: A literature survey},
author = {Xueyi Wang and Joshua Ellul and George Azzopardi},
doi = {https://doi.org/10.3389/frobt.2020.00071},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Frontiers in Robotics and AI},
volume = {7},
pages = {71},
publisher = {Frontiers},
abstract = {Falling is among the most damaging event elderly people may experience. With the ever-growing aging population, there is an urgent need for the development of fall detection systems. Thanks to the rapid development of sensor networks and the Internet of Things (IoT), human-computer interaction using sensor fusion has been regarded as an effective method to address the problem of fall detection. In this paper, we provide a literature survey of work conducted on elderly fall detection using sensor networks and IoT. Although there are various existing studies which focus on the fall detection with individual sensors, such as wearable ones and depth cameras, the performance of these systems are still not satisfying as they suffer mostly from high false alarms. Literature shows that fusing the signals of different sensors could result in higher accuracy and lower false alarms, while improving the robustness of such systems. We approach this survey from different perspectives, including data collection, data transmission, sensor fusion, data analysis, security, and privacy. We also review the benchmark data sets available that have been used to quantify the performance of the proposed methods. The survey is meant to provide researchers in the field of elderly fall detection using sensor networks with a summary of progress achieved up to date and to identify areas where further effort would be beneficial.},
keywords = {convnets, deep learning, egocentric vision, fall detection, predictive analysis, wearables},
pubstate = {published},
tppubtype = {article}
}
2019
Alsahaf, Ahmad; Azzopardi, George; Ducro, Bart; Hanenberg, Egiel; Veerkamp, Roel F; Petkov, Nicolai
Estimation of Muscle Scores of Live Pigs Using a Kinect Camera Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: image processing, pattern recognition, predictive analysis, smart farming
@article{alsahaf2019estimation,
title = {Estimation of Muscle Scores of Live Pigs Using a Kinect Camera},
author = {Ahmad Alsahaf and George Azzopardi and Bart Ducro and Egiel Hanenberg and Roel F Veerkamp and Nicolai Petkov},
doi = {10.1109/ACCESS.2019.2910986},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {IEEE Access},
volume = {7},
pages = {52238--52245},
publisher = {IEEE},
abstract = {The muscle grading of livestock is a primary component of valuation in the meat industry. In pigs, the muscularity of a live animal is traditionally estimated by visual and tactile inspection from an experienced assessor. In addition to being a time-consuming process, scoring of this kind suffers from inconsistencies inherent to the subjectivity of human assessment. On the other hand, accurate, computer-driven methods for carcass composition estimation, such as magnetic resonance imaging (MRI) and computed tomography scans (CT-scans), are expensive and cumbersome to both the animals and their handlers. In this paper, we propose a method that is fast, inexpensive, and non-invasive for estimating the muscularity of live pigs, using RGB-D computer vision and machine learning. We used morphological features extracted from the depth images of pigs to train a classifier that estimates the muscle scores that are likely to be given by a human assessor. The depth images were obtained from a Kinect v1 camera which was placed over an aisle through which the pigs passed freely. The data came from 3246 pigs, each having 20 depth images, and a muscle score from 1 to 7 (reduced later to 5 scores) assigned by an experienced assessor. The classification based on morphological features of the pig's body shape-using a gradient boosted classifier-resulted in a mean absolute error of 0.65 in tenfold cross-validation. Notably, the majority of the errors corresponded to pigs being classified as having muscle scores adjacent to the groundtruth labels given by the assessor. According to the end users of this application, the proposed approach could be used to replace expert assessors at the farm.},
keywords = {image processing, pattern recognition, predictive analysis, smart farming},
pubstate = {published},
tppubtype = {article}
}
Neocleous, Andreas; Azzopardi, George; Kuitems, Margot; Scifo, Andrea; Dee, Michael
Trainable Filters for the Identification of Anomalies in Cosmogenic Isotope Data Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: anomaly detection, brain-inspired, predictive analysis, time-series, trainable filters
@article{neocleous2019trainable,
title = {Trainable Filters for the Identification of Anomalies in Cosmogenic Isotope Data},
author = {Andreas Neocleous and George Azzopardi and Margot Kuitems and Andrea Scifo and Michael Dee},
doi = {10.1109/ACCESS.2019.2900123},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {IEEE Access},
volume = {7},
pages = {24585--24592},
publisher = {IEEE},
abstract = {Extreme bursts of radiation from space result in rapid increases in the concentration of radiocarbon in the atmosphere. Such rises, known as Miyake Events, can be detected through the measurement of radiocarbon in dendrochronological archives. The identification of Miyake Events is important because radiation impacts of this magnitude pose an existential threat to satellite communications and aeronautical avionics and may even be detrimental to human health. However, at present, radiocarbon measurements on tree-ring archives are generally only available at decadal resolution, which smooths out the effect of a possible radiation burst. The Miyake Events discovered so far, in tree-rings from the years 3372-3371 BCE, 774-775 CE, and 993-994 CE, have essentially been found by chance, but there may be more. In this paper, we use signal processing techniques, in particular COSFIRE, to train filters with data on annual changes in radiocarbon (Δ 14 C) around those dates. Then, we evaluate the trained filters and attempt to detect similar Miyake Events in the past. The method that we propose is promising, since it identifies the known Miyake Events at a relatively low false positive rate. Using the findings of this paper, we propose a list of 26 calendar years that our system persistently indicates are Miyake Event-like. We are currently examining a short-list of five of the newly identified dates and intend to perform single-year radiocarbon measurements over them. Signal processing techniques, such as COSFIRE filters, can be used as guidance tools since they are able to identify similar patterns of interest, even if they vary in time or in amplitude.},
keywords = {anomaly detection, brain-inspired, predictive analysis, time-series, trainable filters},
pubstate = {published},
tppubtype = {article}
}
Neocleous, Andreas; Azzopardi, George; Dee, Michael
Identification of possible Δ14C anomalies since 14 ka BP: A computational intelligence approach Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: anomaly detection, brain-inspired, predictive analysis, time-series, trainable filters
@article{neocleous2019identification,
title = {Identification of possible Δ14C anomalies since 14 ka BP: A computational intelligence approach},
author = {Andreas Neocleous and George Azzopardi and Michael Dee},
doi = {10.1016/j.scitotenv.2019.01.251},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {Science of The Total Environment},
volume = {663},
pages = {162--169},
publisher = {Elsevier},
abstract = {Rapid increments in the concentration of the radiocarbon in the atmosphere (Δ14C) have been identified in the years 774-775 CE and 993-994 CE (Miyake events) using annual measurements on known-age tree-rings. The level of cosmic radiation implied by such increases could cause the failure of satellite telecommunication systems, and thus, there is a need to model and predict them. In this work, we investigated several intelligent computational methods to identify similar events in the past. We apply state-of-the-art pattern matching techniques as well as feature representation, a procedure that typically is used in machine learning and classification. To validate our findings, we used as ground truth the two confirmed Miyake events, and several other dates that have been proposed in the literature. We show that some of the methods used in this study successfully identify most of the ground truth events (~1% false positive rate at 75% true positive rate). Our results show that computational methods can be used to identify comparable patterns of interest and hence potentially uncover sudden increments of Δ14C in the past.},
keywords = {anomaly detection, brain-inspired, predictive analysis, time-series, trainable filters},
pubstate = {published},
tppubtype = {article}
}
2018
Alsahaf, Ahmad; Azzopardi, George; Ducro, Bart; Hanenberg, Egiel; Veerkamp, Roel F; Petkov, Nicolai
Prediction of slaughter age in pigs and assessment of the predictive value of phenotypic and genetic information using random forest Journal Article
Abstract | Links | BibTeX | Altmetric | Tags: predictive analysis, smart farming
@article{alsahaf2018prediction,
title = {Prediction of slaughter age in pigs and assessment of the predictive value of phenotypic and genetic information using random forest},
author = {Ahmad Alsahaf and George Azzopardi and Bart Ducro and Egiel Hanenberg and Roel F Veerkamp and Nicolai Petkov},
doi = {https://doi.org/10.1093/jas/sky359},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Journal of animal science},
volume = {96},
number = {12},
pages = {4935--4943},
publisher = {Oxford University Press US},
abstract = {The weight of a pig and the rate of its growth are key elements in pig production. In particular, predicting future growth is extremely useful, since it can help in determining feed costs, pen space requirements, and the age at which a pig reaches a desired slaughter weight. However, making these predictions is challenging, due to the natural variation in how individual pigs grow, and the different causes of this variation. In this paper, we used machine learning, namely random forest (RF) regression, for predicting the age at which the slaughter weight of 120 kg is reached. Additionally, we used the variable importance score from RF to quantify the importance of different types of input data for that prediction. Data of 32,979 purebred Large White pigs were provided by Topigs Norsvin, consisting of phenotypic data, estimated breeding values (EBVs), along with pedigree and pedigree-genetic relationships. Moreover, we presented a 2-step data reduction procedure, based on random projections (RPs) and principal component analysis (PCA), to extract features from the pedigree and genetic similarity matrices for use as inputs in the prediction models. Our results showed that relevant phenotypic features were the most effective in predicting the output (age at 120 kg), explaining approximately 62% of its variance (i.e., R2 = 0.62). Estimated breeding value, pedigree, or pedigree-genetic features interchangeably explain 2% of additional variance when added to the phenotypic features, while explaining, respectively, 38%, 39%, and 34% of the variance when used separately.},
keywords = {predictive analysis, smart farming},
pubstate = {published},
tppubtype = {article}
}