-
Notifications
You must be signed in to change notification settings - Fork 0
/
outlier_detection.py
30 lines (23 loc) · 901 Bytes
/
outlier_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 21 12:32:11 2020
@author: dipanshu
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
data = pd.read_csv('data/data_w_mean_std.csv')
parameter_grouped_data = dict(list(data.groupby('parameterid')))
parameters = list(parameter_grouped_data.keys())
outliers = pd.DataFrame(columns=data.columns)
for parameter in parameters:
df = parameter_grouped_data[parameter]
mean=parameter_grouped_data[parameter]['mean'].mean()
standard_dev=parameter_grouped_data[parameter].standard_dev.mean()
usl = parameter_grouped_data[parameter].usl.mean()
lsl = parameter_grouped_data[parameter].lsl.mean()
outlier_df = df.loc[(df.mv>usl) | (df.mv<lsl)]
outliers=pd.concat([outliers,outlier_df],ignore_index=True)
outliers.to_csv('data/outlier_data.csv',index=False)