참조1 :https://github.com/linkedin/luminol
참조2 : https://www.kaggle.com/caesarlupum/anomaly-detection-time-series-linkedin-luminol/notebook
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc
import warnings
from scipy.stats import norm
warnings.filterwarnings('ignore')
pd.set_option('max_columns', 150)
pd.set_option('max_rows', 150)
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
from scipy import stats
#To plot figs on jupyter
%matplotlib inline
# figure size in inches
rcParams['figure.figsize'] = 14,6
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import warnings
warnings.filterwarnings('ignore')
pd.set_option('max_columns', 200)
pd.set_option('max_rows', 200)
pd.options.display.float_format = '{:.5f}'.format
## DATASET 1 (ent1, ds1)------------------------------------------------------------------------
mu, sigma = 0.0, 1.0
ent1 = np.zeros((10000))
for i in range(10):
# print(mu)
for j in range(1000):
ent1[1000*i+j] = np.random.normal(mu, sigma)
mu = mu + 9 - i
a1 = 0.6
a2 = -0.5
ds1 = np.zeros((10000))
ds1[0] = ent1[0]
ds1[1] = ent1[1]
for i in range(2,10000):
ds1[i] = a1*ds1[i-1] + a2*ds1[i-2] + ent1[i]
## DATASET 2 (ent2 ds2 )------------------------------------------------------------------------
mu = 0.0
ent2 = np.zeros((10000))
for i in range(10):
# print(mu)
for j in range(1000):
sigma = 0.1/(0.01 + (10000 - (i*1000 + j))/10000)
ent2[1000*i+j] = np.random.normal(mu, sigma)
mu = mu + 1
a1 = 0.6
a2 = -0.5
ds2 = np.zeros((10000))
ds2[0] = ent1[0]
ds2[1] = ent1[1]
for i in range(2,10000):
ds2[i] = a1*ds2[i-1] + a2*ds2[i-2] + ent2[i]
## DATASET 3 (ds3) ------------------------------------------------------------------------
mu, sigma1, sigma3 = 0.0, 1.0, 3.0
ds3 = np.zeros((10000))
for i in range(10):
if i in {0,2,4,6,8}:
for j in range(1000):
ds3[1000*i+j] = np.random.normal(mu, sigma1)
else:
for j in range(1000):
ds3[1000*i+j] = np.random.normal(mu, sigma3)
plt.figure(figsize=(16,4))
plt.plot(ent1)
plt.title('Dataset 1 ent1')
plt.ylabel('Values')
plt.xlabel('Count')
plt.legend()
plt.figure(figsize=(16,4))
plt.plot(ent2)
plt.title('Dataset 2 ent2')
plt.ylabel('Values')
plt.xlabel('Count')
plt.legend()
plt.figure(figsize=(16,4))
plt.plot(ds1)
plt.title('Dataset 3 ds1')
plt.ylabel('Values')
plt.xlabel('Count')
plt.legend()
plt.figure(figsize=(16,4))
plt.plot(ds2)
plt.title('Dataset 4 ds2')
plt.ylabel('Values')
plt.xlabel('Count')
plt.legend()
plt.figure(figsize=(16,4))
plt.plot(ds3)
plt.title('Dataset 5 ds3')
plt.ylabel('Values')
plt.xlabel('Count')
plt.legend()
plt.show()
import luminol
from luminol import anomaly_detector,correlator
from luminol.anomaly_detector import AnomalyDetector
from luminol.correlator import Correlator
#Luminol - only if module 'luminol was installed'
#data preprocessing for the framework
data = np.array(ent1)
ts_s = pd.Series(data)
ts_dict = ts_s.to_dict()
data2 = np.array(ent2)
ts_s2 = pd.Series(data)
ts_dict2 = ts_s.to_dict()
detector = anomaly_detector.AnomalyDetector(ts_dict)
anomalies = detector.get_anomalies()
anomalies
if anomalies:
time_period = anomalies[0].get_time_window()
correlator = correlator.Correlator(ts_dict, ts_dict2, time_period)
print(correlator.get_correlation_result().coefficient)
def scoreLuminolALLData(ts_dict):
data = np.array(ts_dict)
ts_s = pd.Series(data)
ts_dict = ts_s.to_dict()
detector = anomaly_detector.AnomalyDetector(ts_dict)
score = detector.get_all_scores()
score_v = []
for timestamp, value in score.iteritems():
score_v.append(value)
# print(timestamp, value)
return score_v
dataplot1 = scoreLuminolALLData(ent1)
dataplot2 = scoreLuminolALLData(ent2)
dataplot3 = scoreLuminolALLData(ds1)
dataplot4 = scoreLuminolALLData(ds2)
dataplot5 = scoreLuminolALLData(ds3)
dataplot1
더보기
더보기
더보기
dataLUMINOL_dataset1 = np.array(dataplot1)
from scipy import stats
dataLUMINOL_dataset1 = stats.describe(dataplot1)
dataLUMINOL_dataset1
qt25_ds1 = np.percentile(dataplot1, 25) # Q1 백분위 25퍼센트
qt50_ds1 = np.percentile(dataplot1, 50) # Q2 백분위 25퍼센트
qt75_ds1 = np.percentile(dataplot1, 75) # Q3 백분위 25퍼센트
qt25_ds1, qt50_ds1, qt75_ds1
dfLUMINOL_dataset1 = pd.DataFrame(dataplot1, columns=['Score'])
dfLUMINOL_dataset1.value_counts()
# 이건 퍼센테이지가 왜저렇게 나왔는지 분석하기 위해서 보여준것임.
def plot_anomaly_score_low_higt(datascore, data):
datascore_ = np.array(datascore)
from scipy import stats
datascore_ = stats.describe(datascore)
datascore_ = pd.DataFrame(datascore, columns=['Score'])
delta = np.percentile(datascore, 75)
print('Threashold ',delta)
plt.figure(figsize=(16,6))
plt.plot(data)
plt.title("data count")
plt.figure(figsize=(16,6))
plt.plot(datascore)
plt.title("data count")
plt.figure(figsize=(16,6))
df_high_data_ = datascore_[datascore_ <= delta]
df_high_score_ = datascore_[datascore_ > delta]
plt.plot(datascore_.index, datascore_.Score.fillna(1), c='gray', alpha=0.4)
plt.scatter(df_high_data_.index, df_high_data_.values, label='Inline', s=10)
plt.scatter(df_high_score_.index, df_high_score_.values, label='Outlier', c='red', s=10)
plt.margins(x=0.01,y=0.2)
plt.title('Anomaly Score ')
plt.ylabel('Score')
plt.xlabel('Data Count')
plt.legend()
plt.show()
dataLUMINOL_dataset2 = np.array(dataplot2)
from scipy import stats
dataLUMINOL_dataset2 = stats.describe(dataplot2)
dataLUMINOL_dataset2
qt25_ds2 = np.percentile(dataplot2, 25) # Q1
qt50_ds2 = np.percentile(dataplot2, 50) # Q2
qt75_ds2 = np.percentile(dataplot2, 75) # Q3
qt25_ds2,qt50_ds2, qt75_ds2
dfLUMINOL_dataset2 = pd.DataFrame(dataplot2, columns=['Score'])
plot_anomaly_score_low_higt(dfLUMINOL_dataset2, ent2)
dataLUMINOL_dataset4 = np.array(dataplot4)
from scipy import stats
dataLUMINOL_dataset4 = stats.describe(dataplot4)
dataLUMINOL_dataset4
qt25_ds4 = np.percentile(dataplot4, 25) # Q1
qt50_ds4 = np.percentile(dataplot4, 50) # Q2
qt75_ds4 = np.percentile(dataplot4, 75) # Q3
qt25_ds4, qt50_ds4, qt75_ds4
dfLUMINOL_dataset4 = pd.DataFrame(dataplot4, columns=['Score'])
plot_anomaly_score_low_higt(dfLUMINOL_dataset4, ds2)
'<개념> > 시계열' 카테고리의 다른 글
Timeseries similiarity algorithm List (0) | 2022.05.25 |
---|---|
CPD Algorithm 1) Rupture 2) jenkspy 간단히 사용해보기 (0) | 2021.12.30 |
적용가능한 python CPD (change point detection) 링크모음 (0) | 2021.12.24 |
시계열관련 기본개념&단어정리 (0) | 2021.11.28 |
잔차진단(백색잡음) 내용 정리 (0) | 2021.11.28 |