DM-exp-1/4-2-2.py

13 lines
326 B
Python
Raw Normal View History

#等频离散化
import pandas as pd
import numpy as np
num=[5,10,11,13,15,35,50,55,72,92,204,215]
k=4; #设置离散后的数据段为5
temp=[ i/k*100 for i in range(k+1)]
print(temp)
w=[ ]
for item in temp:
w.append(np.percentile(num,item))
w[0] = w[0]*(1-1e-10)
d2=pd.cut(num,w,labels=range(k))
print(d2.value_counts())