풀필먼트 수요예측

YSTONe_ 2023. 10. 3. 17:44

2023. 10. 3. 17:44

풀필먼트 센터는 고객이 많다. 정말 많다.
이 많은 고객이 오늘 주문이 몇개 들어올지 예상하는 것은 굉장히 귀찮은 일이 될 수 있다.
예측 정확도를 높은 수준으로 올리기 어려운 것 같다.

그래서, 무당처럼 이번주 트랜드, 내일의 트랜드는 맞추지 못하더라도 현재 들어온 주문량을 토대로 오늘 총 얼마나 들어올지 예상하는 것을 만들었다(네비게이션처럼 예상 도착시간을 알려주는 느낌)

지금까지 들어온 주문 데이터를 업로드하면, 오늘 총 예상 주문수량 그래프를 만들어주는 WEB

mysql을 사용해보고 싶어서 Databse를 만들었으나, 상황에 따라 XML, TXT, CSV를 사용하는 것도 괜찮을 것 같다(데이터가 많지 않다면)

※ 대시보드 로직
0) 현재까지 주문 인입된 정보 excel 파일을 업로드
1) DB에 축적된 고객 별 시간 대 별 주문인입량을 조회
2) 조회한 시간까지 기준으로 가장 주문인입량이 비슷한 날을 찾음
3) 고객 별로 찾은 값들을 모두 합산함(시간 별로)
4) 그래프로 시간 별 주문 인입량을 표시 함

라이브러리

### <라이브러리 임포트>

from dash import Dash, dcc, html, dash_table
from dash.dependencies import Input, Output, State
import dash_bootstrap_components as dbc
import dash_uploader as du
import plotly.graph_objs as go
import datetime
import pymysql
import pandas as pd
import xlwings as xw
import os
import math
import numpy as np
import time

이상치를 제거하는 함수

def get_outlier(df=None, column=None, weight=1.5):
  # target 값과 상관관계가 높은 열을 우선적으로 진행
    quantile_25 = np.percentile(df[column].values, 25)
    quantile_75 = np.percentile(df[column].values, 75)
    IQR = quantile_75 - quantile_25
    IQR_weight = IQR*weight
    lowest = quantile_25 - IQR_weight
    highest = quantile_75 + IQR_weight
    outlier_idx = df[column][ (df[column] < lowest) | (df[column] > highest) ].index
    return outlier_idx

def delect_out(df):
	# 고객별로 루프돌기 위함
    for i in df.columns:
        out = get_outlier(df = df, column=i,weight=1.5)
        df.drop(out, axis = 0, inplace = True)
    return df

우리 회사 엑셀다운로드 양식에 맞춰서 재료 준비

col = ['고객', '날자', '합계', '00:00~01:00', '01:00~02:00','02:00~03:00', '03:00~04:00', '04:00~05:00', '05:00~06:00','06:00~07:00', '07:00~08:00', '08:00~09:00', '09:00~10:00','10:00~11:00', '11:00~12:00', '12:00~13:00', '13:00~14:00','14:00~15:00', '15:00~16:00', '16:00~17:00', '17:00~18:00','18:00~19:00', '19:00~20:00', '20:00~21:00', '21:00~22:00','22:00~23:00', '23:00~24:00', '요일', '주차', '달', '홀짝']
list_col = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24]
col_time = ['00:00~01:00', '01:00~02:00', '02:00~03:00', '03:00~04:00',
       '04:00~05:00', '05:00~06:00', '06:00~07:00', '07:00~08:00',
       '08:00~09:00', '09:00~10:00', '10:00~11:00', '11:00~12:00',
       '12:00~13:00', '13:00~14:00', '14:00~15:00', '15:00~16:00',
       '16:00~17:00', '17:00~18:00', '18:00~19:00', '19:00~20:00',
       '20:00~21:00', '21:00~22:00', '22:00~23:00', '23:00~24:00']

엑셀파일 형식

가장 비슷한 날을 찾는 함수

def run_data(search_time=9, spread_per=0.05, db_data=None, target_data=None): 


    search_time = search_time
    spread_per = spread_per
    order_tm_df = db_data
    sample_df=target_data
    sample_df.iloc[-1,2] = '전체'
    print(sample_df.iloc[-1,:])

    time_col_list = col_time[:search_time]
    print(time_col_list)
    print('조회 time', col_time[search_time-1])
    predict_seller = pd.DataFrame(columns=['예측량'])
    predict_time_seller = pd.DataFrame()

    for seller in sample_df['고객']:
        
        try :
            ## <고객사 별로 해당 시간대에 물량 gap이 가장 적은 날을 찾음>
            cj_test = abs(order_tm_df.loc[order_tm_df['고객'] == seller, time_col_list].sum(axis =1 ) - sample_df.loc[sample_df['고객']==seller, time_col_list].sum(axis= 1).values[0])
            test = cj_test.sort_values()[:math.ceil(len(cj_test)*spread_per)].index
            raw_df = order_tm_df.loc[:,col_time]
            a = raw_df.iloc[test,:]
            ## </고객사 별로 해당 시간대에 물량 gap이 가장 적은 날을 찾음>



            a = delect_out(a)
            target_val = sample_df.loc[sample_df['고객']==seller,col_time].values[0].sum()
            predict_val = a.sum(axis=1).mean()
            predict_seller.loc[seller, :] = predict_val
            predict_now = (a.loc[:,time_col_list]).sum(axis=1).mean()
            predict_time_seller[seller] = a.mean()
            print(seller,'모수',len(cj_test),'대상 : ', len(test),'예측값',predict_val,'측정 시간까지 예측:',predict_now,'/ 실값', target_val,'정확도:',((target_val-abs(target_val-predict_now))/target_val)*100,'%')
        except:
            print('아직 데이터가 없습니다.')
            predict_seller.loc[seller, :] = '데이터 부족'
            pass
    return predict_time_seller[predict_time_seller.columns.difference(['전체'])].sum(axis=1)

엑셀 업로드한 파일을 찾는 함수

def first_data(path_dir):
    files_Path = path_dir
    file_name_and_time_lst = []
    for f_name in os.listdir(f"{files_Path}"):
        written_time = os.path.getctime(f"{files_Path}{f_name}")
        file_name_and_time_lst.append((f_name, written_time))

    sorted_file_lst = sorted(file_name_and_time_lst, key=lambda x: x[1], reverse=True)

    recent_file = sorted_file_lst[0]
    stock_path = recent_file[0]

    return path_dir + stock_path

엑셀파일을 조회하는 함수(DRM때문에 바로 접근하는 것은 안되고 엑셀을 한번 물리적으로 켜는 행위)

def open_xls(csv_file):
    app = xw.App(visible=False)
    open_file = xw.Book(csv_file)
    file_to_df = open_file.sheets(1).used_range.options(pd.DataFrame).value.reset_index()
    app.kill()
    return file_to_df

WEB 화면(PYTHON PLOTLY DASH)

#### <화면>


external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets =[dbc.themes.BOOTSTRAP])


du.configure_upload(app, "C:\\Users\\cjl\\Desktop\\dev\\time_predcit\\upload")


page_size = 20

app.layout = html.Div(style={'backgroundColor': "#F9F7F7"}, 
 children=[
    
    html.H4(children ="주문인입 TREND 예측 CHART",
           style = {
               "background" : '#112D4E',
               'padding': '30px 30px',
               "color" : '#FFFFFF',
               "font-weight": "bold"
           }),
    html.Br(),
    html.Div(children = '오차 범위 설정(%)',
            style = {
                "font-weight" : "bold"
            }),
    dcc.Input(
        id='my-input',
        value='0.05',
        type = 'text'
    ),
    du.Upload(text = '시간별주문실적 엑셀 업로드',
            text_completed = '업로드 완료 :'),
    
    html.Button(id='submit-button-state', n_clicks=0, children='조회',
               style = {
                   'padding' : '8px 20px',
                   'background' : '#112D4E',
                   'color' : 'white'
               }),
    html.Br(),
    html.Br(),
    html.Div(children = '결과 조회',
            style = {
                'font-weight' : 'bold'
            }),

    dcc.Graph(id='my-graph'),
    html.Div(id='output-total')
     
    
])

화면 연동

@app.callback(Output('my-graph', 'figure'),Output('output-total','children'),
              Input('submit-button-state', 'n_clicks'),
              State('my-input', 'value'),
              prevent_initial_call = True,
             )
def show_fig(n_clicks, input1):
    print('오차범위:',input1)
    print('파일경로:',first_data(first_data('MY-DIR-PATH')+"\\"))
    os.system("python ./importexcel.py")
    print('대기완료')
    upload_data = pd.read_excel('MY-DIR-PATH + IMPORT EXCEL FILE NAME')
    conn = pymysql.connect(host='127.0.0.1', user='root', password='my-password', db='order_info', charset='utf8')
    cur = conn.cursor()
    qer = "SELECT * FROM time_order"
    cur.execute(qer)
    result = cur.fetchall()
    res = pd.DataFrame(result)
    res.columns = col
    res = res.loc[(res['요일']!=1)|(res['요일']!=2),:]
    db_data = res
    plot_db = run_data(search_time=datetime.datetime.now().hour,spread_per=float(input1),db_data = db_data, target_data=upload_data)
    print(plot_db)
    fig = go.Figure(data=[go.Scatter(x=plot_db.index.values, y=plot_db.values, mode = "lines+markers")])
    total_val = '당일 TOTAL 물량 누계 :{}'.format(plot_db.values.sum())

    return fig, total_val

앱실행

### <앱실행>
if __name__ == '__main__':
    app.run_server(debug=False,host = '0.0.0.0', port = '8070')

저작자표시 (새창열림)

YSTONe

풀필먼트 수요예측

+ Recent posts

티스토리툴바