|
import argparse |
|
import os |
|
import numpy as np |
|
|
|
|
|
def str2bool(v): |
|
return v.lower() in ('true', '1') |
|
|
|
|
|
arg_lists = [] |
|
parser = argparse.ArgumentParser() |
|
|
|
|
|
def add_argument_group(name): |
|
arg = parser.add_argument_group(name) |
|
arg_lists.append(arg) |
|
return arg |
|
|
|
|
|
|
|
game_arg = add_argument_group('BeerGame') |
|
game_arg.add_argument('--task', type=str, default='bg') |
|
game_arg.add_argument( |
|
'--fixedAction', |
|
type=str2bool, |
|
default='False', |
|
help='if you want to have actions in [0,actionMax] set it to True. with False it will set it [actionLow, actionUp]' |
|
) |
|
game_arg.add_argument( |
|
'--observation_data', |
|
type=str2bool, |
|
default=False, |
|
help='if it is True, then it uses the data that is generated by based on few real world observation' |
|
) |
|
game_arg.add_argument('--data_id', type=int, default=22, help='the default item id for the basket dataset') |
|
game_arg.add_argument('--TLow', type=int, default=100, help='duration of one GAME (lower bound)') |
|
game_arg.add_argument('--TUp', type=int, default=100, help='duration of one GAME (upper bound)') |
|
game_arg.add_argument( |
|
'--demandDistribution', |
|
type=int, |
|
default=0, |
|
help='0=uniform, 1=normal distribution, 2=the sequence of 4,4,4,4,8,..., 3= basket data, 4= forecast data' |
|
) |
|
game_arg.add_argument( |
|
'--scaled', type=str2bool, default=False, help='if true it uses the (if) existing scaled parameters' |
|
) |
|
game_arg.add_argument('--demandSize', type=int, default=6100, help='the size of demand dataset') |
|
game_arg.add_argument('--demandLow', type=int, default=0, help='the lower bound of random demand') |
|
game_arg.add_argument('--demandUp', type=int, default=3, help='the upper bound of random demand') |
|
game_arg.add_argument('--demandMu', type=float, default=10, help='the mu of the normal distribution for demand ') |
|
game_arg.add_argument('--demandSigma', type=float, default=2, help='the sigma of the normal distribution for demand ') |
|
game_arg.add_argument('--actionMax', type=int, default=2, help='it works when fixedAction is True') |
|
game_arg.add_argument( |
|
'--actionUp', type=int, default=2, help='bounds on my decision (upper bound), it works when fixedAction is True' |
|
) |
|
game_arg.add_argument( |
|
'--actionLow', type=int, default=-2, help='bounds on my decision (lower bound), it works when fixedAction is True' |
|
) |
|
game_arg.add_argument( |
|
'--action_step', type=int, default=1, help='The obtained action value by dnn is multiplied by this value' |
|
) |
|
game_arg.add_argument('--actionList', type=list, default=[], help='The list of the available actions') |
|
game_arg.add_argument('--actionListLen', type=int, default=0, help='the length of the action list') |
|
game_arg.add_argument( |
|
'--actionListOpt', type=int, default=0, help='the action list which is used in optimal and sterman' |
|
) |
|
game_arg.add_argument('--actionListLenOpt', type=int, default=0, help='the length of the actionlistopt') |
|
game_arg.add_argument('--agentTypes', type=list, default=['dnn', 'dnn', 'dnn', 'dnn'], help='the player types') |
|
game_arg.add_argument( |
|
'--agent_type1', type=str, default='dnn', help='the player types for agent 1, it can be dnn, Strm, bs, rnd' |
|
) |
|
game_arg.add_argument( |
|
'--agent_type2', type=str, default='dnn', help='the player types for agent 2, it can be dnn, Strm, bs, rnd' |
|
) |
|
game_arg.add_argument( |
|
'--agent_type3', type=str, default='dnn', help='the player types for agent 3, it can be dnn, Strm, bs, rnd' |
|
) |
|
game_arg.add_argument( |
|
'--agent_type4', type=str, default='dnn', help='the player types for agent 4, it can be dnn, Strm, bs, rnd' |
|
) |
|
game_arg.add_argument('--NoAgent', type=int, default=4, help='number of agents, currently it should be in {1,2,3,4}') |
|
game_arg.add_argument('--cp1', type=float, default=2.0, help='shortage cost of player 1') |
|
game_arg.add_argument('--cp2', type=float, default=0.0, help='shortage cost of player 2') |
|
game_arg.add_argument('--cp3', type=float, default=0.0, help='shortage cost of player 3') |
|
game_arg.add_argument('--cp4', type=float, default=0.0, help='shortage cost of player 4') |
|
game_arg.add_argument('--ch1', type=float, default=2.0, help='holding cost of player 1') |
|
game_arg.add_argument('--ch2', type=float, default=2.0, help='holding cost of player 2') |
|
game_arg.add_argument('--ch3', type=float, default=2.0, help='holding cost of player 3') |
|
game_arg.add_argument('--ch4', type=float, default=2.0, help='holding cost of player 4') |
|
game_arg.add_argument('--alpha_b1', type=float, default=-0.5, help='alpha of Sterman formula parameter for player 1') |
|
game_arg.add_argument('--alpha_b2', type=float, default=-0.5, help='alpha of Sterman formula parameter for player 2') |
|
game_arg.add_argument('--alpha_b3', type=float, default=-0.5, help='alpha of Sterman formula parameter for player 3') |
|
game_arg.add_argument('--alpha_b4', type=float, default=-0.5, help='alpha of Sterman formula parameter for player 4') |
|
game_arg.add_argument('--betta_b1', type=float, default=-0.2, help='beta of Sterman formula parameter for player 1') |
|
game_arg.add_argument('--betta_b2', type=float, default=-0.2, help='beta of Sterman formula parameter for player 2') |
|
game_arg.add_argument('--betta_b3', type=float, default=-0.2, help='beta of Sterman formula parameter for player 3') |
|
game_arg.add_argument('--betta_b4', type=float, default=-0.2, help='beta of Sterman formula parameter for player 4') |
|
game_arg.add_argument('--eta', type=list, default=[0, 4, 4, 4], help='the total cost regulazer') |
|
game_arg.add_argument('--distCoeff', type=int, default=20, help='the total cost regulazer') |
|
game_arg.add_argument( |
|
'--ifUseTotalReward', |
|
type=str2bool, |
|
default='False', |
|
help='if you want to have the total rewards in the experience replay, set it to true.' |
|
) |
|
game_arg.add_argument( |
|
'--ifUsedistTotReward', |
|
type=str2bool, |
|
default='True', |
|
help='If use correction to the rewards in the experience replay for all iterations of current game' |
|
) |
|
game_arg.add_argument( |
|
'--ifUseASAO', |
|
type=str2bool, |
|
default='True', |
|
help='if use AS and AO, i.e., received shipment and received orders in the input of DNN' |
|
) |
|
game_arg.add_argument('--ifUseActionInD', type=str2bool, default='False', help='if use action in the input of DNN') |
|
game_arg.add_argument( |
|
'--stateDim', type=int, default=5, help='Number of elements in the state desciptor - Depends on ifUseASAO' |
|
) |
|
game_arg.add_argument('--iftl', type=str2bool, default=False, help='if apply transfer learning') |
|
game_arg.add_argument( |
|
'--ifTransferFromSmallerActionSpace', |
|
type=str2bool, |
|
default=False, |
|
help='if want to transfer knowledge from a network with different action space size.' |
|
) |
|
game_arg.add_argument( |
|
'--baseActionSize', |
|
type=int, |
|
default=5, |
|
help='if ifTransferFromSmallerActionSpace is true, this determines the size of action space of saved network' |
|
) |
|
game_arg.add_argument( |
|
'--tlBaseBrain', |
|
type=int, |
|
default=3, |
|
help='the gameConfig of the base network for re-training with transfer-learning' |
|
) |
|
game_arg.add_argument('--baseDemandDistribution', type=int, default=0, help='same as the demandDistribution') |
|
game_arg.add_argument( |
|
'--MultiAgent', type=str2bool, default=False, help='if run multi-agent RL model, not fully operational' |
|
) |
|
game_arg.add_argument( |
|
'--MultiAgentRun', |
|
type=list, |
|
default=[True, True, True, True], |
|
help='In the multi-RL setting, it determines which agent should get training.' |
|
) |
|
game_arg.add_argument( |
|
'--if_use_AS_t_plus_1', type=str2bool, default='False', help='if use AS[t+1], not AS[t] in the input of DNN' |
|
) |
|
game_arg.add_argument( |
|
'--ifSinglePathExist', |
|
type=str2bool, |
|
default=False, |
|
help='If true it uses the predefined path in pre_model_dir and does not merge it with demandDistribution.' |
|
) |
|
game_arg.add_argument('--gamma', type=float, default=.99, help='discount factor for reward') |
|
game_arg.add_argument( |
|
'--multPerdInpt', type=int, default=10, help='Number of history records which we feed into network' |
|
) |
|
|
|
|
|
leadtimes_arg = add_argument_group('leadtimes') |
|
leadtimes_arg.add_argument( |
|
'--leadRecItemLow', type=list, default=[2, 2, 2, 4], help='the min lead time for receiving items' |
|
) |
|
leadtimes_arg.add_argument( |
|
'--leadRecItemUp', type=list, default=[2, 2, 2, 4], help='the max lead time for receiving items' |
|
) |
|
leadtimes_arg.add_argument( |
|
'--leadRecOrderLow', type=int, default=[2, 2, 2, 0], help='the min lead time for receiving orders' |
|
) |
|
leadtimes_arg.add_argument( |
|
'--leadRecOrderUp', type=int, default=[2, 2, 2, 0], help='the max lead time for receiving orders' |
|
) |
|
leadtimes_arg.add_argument('--ILInit', type=list, default=[0, 0, 0, 0], help='') |
|
leadtimes_arg.add_argument('--AOInit', type=list, default=[0, 0, 0, 0], help='') |
|
leadtimes_arg.add_argument('--ASInit', type=list, default=[0, 0, 0, 0], help='the initial shipment of each agent') |
|
leadtimes_arg.add_argument('--leadRecItem1', type=int, default=2, help='the min lead time for receiving items') |
|
leadtimes_arg.add_argument('--leadRecItem2', type=int, default=2, help='the min lead time for receiving items') |
|
leadtimes_arg.add_argument('--leadRecItem3', type=int, default=2, help='the min lead time for receiving items') |
|
leadtimes_arg.add_argument('--leadRecItem4', type=int, default=2, help='the min lead time for receiving items') |
|
leadtimes_arg.add_argument('--leadRecOrder1', type=int, default=2, help='the min lead time for receiving order') |
|
leadtimes_arg.add_argument('--leadRecOrder2', type=int, default=2, help='the min lead time for receiving order') |
|
leadtimes_arg.add_argument('--leadRecOrder3', type=int, default=2, help='the min lead time for receiving order') |
|
leadtimes_arg.add_argument('--leadRecOrder4', type=int, default=2, help='the min lead time for receiving order') |
|
leadtimes_arg.add_argument('--ILInit1', type=int, default=0, help='the initial inventory level of the agent') |
|
leadtimes_arg.add_argument('--ILInit2', type=int, default=0, help='the initial inventory level of the agent') |
|
leadtimes_arg.add_argument('--ILInit3', type=int, default=0, help='the initial inventory level of the agent') |
|
leadtimes_arg.add_argument('--ILInit4', type=int, default=0, help='the initial inventory level of the agent') |
|
leadtimes_arg.add_argument('--AOInit1', type=int, default=0, help='the initial arriving order of the agent') |
|
leadtimes_arg.add_argument('--AOInit2', type=int, default=0, help='the initial arriving order of the agent') |
|
leadtimes_arg.add_argument('--AOInit3', type=int, default=0, help='the initial arriving order of the agent') |
|
leadtimes_arg.add_argument('--AOInit4', type=int, default=0, help='the initial arriving order of the agent') |
|
leadtimes_arg.add_argument('--ASInit1', type=int, default=0, help='the initial arriving shipment of the agent') |
|
leadtimes_arg.add_argument('--ASInit2', type=int, default=0, help='the initial arriving shipment of the agent') |
|
leadtimes_arg.add_argument('--ASInit3', type=int, default=0, help='the initial arriving shipment of the agent') |
|
leadtimes_arg.add_argument('--ASInit4', type=int, default=0, help='the initial arriving shipment of the agent') |
|
|
|
|
|
test_arg = add_argument_group('testing') |
|
test_arg.add_argument( |
|
'--testRepeatMid', |
|
type=int, |
|
default=50, |
|
help='it is number of episodes which is going to be used for testing in the middle of training' |
|
) |
|
test_arg.add_argument('--testInterval', type=int, default=100, help='every xx games compute "test error"') |
|
test_arg.add_argument( |
|
'--ifSaveFigure', type=str2bool, default=True, help='if is it True, save the figures in each testing.' |
|
) |
|
test_arg.add_argument( |
|
'--if_titled_figure', |
|
type=str2bool, |
|
default='True', |
|
help='if is it True, save the figures with details in the title.' |
|
) |
|
test_arg.add_argument( |
|
'--ifsaveHistInterval', type=str2bool, default=False, help='if every xx games save details of the episode' |
|
) |
|
test_arg.add_argument('--saveHistInterval', type=int, default=50000, help='every xx games save details of the play') |
|
test_arg.add_argument('--Ttest', type=int, default=100, help='it defines the number of periods in the test cases') |
|
test_arg.add_argument( |
|
'--ifOptimalSolExist', |
|
type=str2bool, |
|
default=True, |
|
help='if the instance has optimal base stock policy, set it to True, otherwise it should be False.' |
|
) |
|
test_arg.add_argument('--f1', type=float, default=8, help='base stock policy decision of player 1') |
|
test_arg.add_argument('--f2', type=float, default=8, help='base stock policy decision of player 2') |
|
test_arg.add_argument('--f3', type=float, default=0, help='base stock policy decision of player 3') |
|
test_arg.add_argument('--f4', type=float, default=0, help='base stock policy decision of player 4') |
|
test_arg.add_argument( |
|
'--f_init', |
|
type=list, |
|
default=[32, 32, 32, 24], |
|
help='base stock policy decision for 4 time-steps on the C(4,8) demand distribution' |
|
) |
|
test_arg.add_argument('--use_initial_BS', type=str2bool, default=False, help='If use f_init set it to True') |
|
|
|
|
|
reporting_arg = add_argument_group('reporting') |
|
reporting_arg.add_argument('--Rsltdnn', type=list, default=[], help='the result of dnn play tests will be saved here') |
|
reporting_arg.add_argument( |
|
'--RsltRnd', type=list, default=[], help='the result of random play tests will be saved here' |
|
) |
|
reporting_arg.add_argument( |
|
'--RsltStrm', type=list, default=[], help='the result of heuristic fomula play tests will be saved here' |
|
) |
|
reporting_arg.add_argument( |
|
'--Rsltbs', type=list, default=[], help='the result of optimal play tests will be saved here' |
|
) |
|
reporting_arg.add_argument( |
|
'--ifSaveHist', |
|
type=str2bool, |
|
default='False', |
|
help= |
|
'if it is true, saves history, prediction, and the randBatch in each period, WARNING: just make it True in small runs, it saves huge amount of files.' |
|
) |
|
|
|
|
|
|
|
def buildActionList(config): |
|
aDiv = 1 |
|
if config.fixedAction: |
|
actions = list( |
|
range(0, config.actionMax + 1, aDiv) |
|
) |
|
else: |
|
actions = list(range(config.actionLow, config.actionUp + 1, aDiv)) |
|
return actions |
|
|
|
|
|
|
|
def getStateDim(config): |
|
if config.ifUseASAO: |
|
stateDim = 5 |
|
else: |
|
stateDim = 3 |
|
|
|
if config.ifUseActionInD: |
|
stateDim += 1 |
|
|
|
return stateDim |
|
|
|
|
|
def set_optimal(config): |
|
if config.demandDistribution == 0: |
|
if config.cp1 == 2 and config.ch1 == 2 and config.ch2 == 2 and config.ch3 == 2 and config.ch4 == 2: |
|
config.f1 = 8. |
|
config.f2 = 8. |
|
config.f3 = 0. |
|
config.f4 = 0. |
|
|
|
|
|
def get_config(): |
|
config, unparsed = parser.parse_known_args() |
|
config = update_config(config) |
|
|
|
return config, unparsed |
|
|
|
|
|
def fill_leadtime_initial_values(config): |
|
config.leadRecItemLow = [config.leadRecItem1, config.leadRecItem2, config.leadRecItem3, config.leadRecItem4] |
|
config.leadRecItemUp = [config.leadRecItem1, config.leadRecItem2, config.leadRecItem3, config.leadRecItem4] |
|
config.leadRecOrderLow = [config.leadRecOrder1, config.leadRecOrder2, config.leadRecOrder3, config.leadRecOrder4] |
|
config.leadRecOrderUp = [config.leadRecOrder1, config.leadRecOrder2, config.leadRecOrder3, config.leadRecOrder4] |
|
config.ILInit = [config.ILInit1, config.ILInit2, config.ILInit3, config.ILInit4] |
|
config.AOInit = [config.AOInit1, config.AOInit2, config.AOInit3, config.AOInit4] |
|
config.ASInit = [config.ASInit1, config.ASInit2, config.ASInit3, config.ASInit4] |
|
|
|
|
|
def get_auxuliary_leadtime_initial_values(config): |
|
config.leadRecOrderUp_aux = [config.leadRecOrder1, config.leadRecOrder2, config.leadRecOrder3, config.leadRecOrder4] |
|
config.leadRecItemUp_aux = [config.leadRecItem1, config.leadRecItem2, config.leadRecItem3, config.leadRecItem4] |
|
|
|
|
|
def fix_lead_time_manufacturer(config): |
|
if config.leadRecOrder4 > 0: |
|
config.leadRecItem4 += config.leadRecOrder4 |
|
config.leadRecOrder4 = 0 |
|
|
|
|
|
def set_sterman_parameters(config): |
|
config.alpha_b = [config.alpha_b1, config.alpha_b2, config.alpha_b3, config.alpha_b4] |
|
config.betta_b = [config.betta_b1, config.betta_b2, config.betta_b3, config.betta_b4] |
|
|
|
|
|
def update_config(config): |
|
config.actionList = buildActionList(config) |
|
config.actionListLen = len(config.actionList) |
|
|
|
set_optimal(config) |
|
config.f = [config.f1, config.f2, config.f3, config.f4] |
|
|
|
config.actionListLen = len(config.actionList) |
|
if config.demandDistribution == 0: |
|
config.actionListOpt = list(range(0, int(max(config.actionUp * 30 + 1, 3 * sum(config.f))), 1)) |
|
else: |
|
config.actionListOpt = list(range(0, int(max(config.actionUp * 30 + 1, 7 * sum(config.f))), 1)) |
|
config.actionListLenOpt = len(config.actionListOpt) |
|
|
|
config.c_h = [config.ch1, config.ch2, config.ch3, config.ch4] |
|
config.c_p = [config.cp1, config.cp2, config.cp3, config.cp4] |
|
|
|
config.stateDim = getStateDim(config) |
|
get_auxuliary_leadtime_initial_values(config) |
|
fix_lead_time_manufacturer(config) |
|
fill_leadtime_initial_values(config) |
|
set_sterman_parameters(config) |
|
|
|
return config |
|
|