The Anomaly Detection handler implements supervised, semi-supervised, and unsupervised anomaly detection algorithms using the pyod, catboost, xgboost, and sklearn libraries. The models were chosen based on the results in the following benchmark paper:
https://www.andrew.cmu.edu/user/yuezhao2/papers/22-neurips-adbench.pdf
If no labelled data, we use an unsupervised learner with the syntax CREATE ANOMALY DETECTION MODEL <model_name> without specifying the target to predict. MindsDB then adds a column called outlier when generating results.
If we have labelled data, we use the regular model creation syntax. There is backend logic that chooses between a semi-supervised algorithm (currently XGBOD) vs. a supervised algorithm (currently CatBoost).
If multiple models are provided, then we create an ensemble and take use majority voting
CREATE ANOMALY DETECTION MODEL mindsdb.unsupervised_adFROM files (SELECT * FROM anomaly_detection)USING engine = 'anomaly_detection';DESCRIBE MODEL mindsdb.unsupervised_ad.model;SELECT t.class, m.outlier as anomalyFROM files.anomaly_detection as tJOIN mindsdb.unsupervised_ad as m;
CREATE MODEL mindsdb.semi_supervised_adFROM files (SELECT * FROM anomaly_detection)PREDICT classUSING engine = 'anomaly_detection';DESCRIBE MODEL mindsdb.semi_supervised_ad.model;SELECT t.carat, t.category, t.class, m.class as anomalyFROM files.anomaly_detection as tJOIN mindsdb.semi_supervised_ad as m;
CREATE MODEL mindsdb.supervised_adFROM files (SELECT * FROM anomaly_detection)PREDICT classUSING engine = 'anomaly_detection', type = 'supervised';DESCRIBE MODEL mindsdb.supervised_ad.model;SELECT t.carat, t.category, t.class, m.class as anomalyFROM files.anomaly_detection as tJOIN mindsdb.supervised_ad as m;
CREATE ANOMALY DETECTION MODEL mindsdb.unsupervised_ad_knnFROM files (SELECT * FROM anomaly_detection)USING engine = 'anomaly_detection', model_name='knn';DESCRIBE MODEL mindsdb.unsupervised_ad_knn.model;SELECT t.class, m.outlier as anomalyFROM files.anomaly_detection as tJOIN mindsdb.unsupervised_ad_knn as m;
CREATE ANOMALY DETECTION MODEL mindsdb.unsupervised_ad_localFROM files (SELECT * FROM anomaly_detection)USING engine = 'anomaly_detection', anomaly_type='local';DESCRIBE MODEL mindsdb.unsupervised_ad_local.model;SELECT t.class, m.outlier as anomalyFROM files.anomaly_detection as tJOIN mindsdb.unsupervised_ad_local as m;
create ANOMALY DETECTION MODEL mindsdb.ad_ensembleFROM files (SELECT * FROM anomaly_detection)USING engine='anomaly_detection', ensemble_models=['knn','ecod','lof'];DESCRIBE MODEL mindsdb.ad_ensemble.model;SELECT t.class, m.outlier as anomalyFROM files.anomaly_detection as tJOIN mindsdb.ad_ensemble as m;