diff --git a/__pycache__/config.cpython-37.pyc b/__pycache__/config.cpython-37.pyc
deleted file mode 100644
index e8d1d36..0000000
Binary files a/__pycache__/config.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/config.cpython-38.pyc b/__pycache__/config.cpython-38.pyc
index 8ec24f7..f73b463 100644
Binary files a/__pycache__/config.cpython-38.pyc and b/__pycache__/config.cpython-38.pyc differ
diff --git a/neural_network.py b/neural_network.py
index 6ccaff7..d420b0f 100644
--- a/neural_network.py
+++ b/neural_network.py
@@ -1,121 +1,138 @@
 import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras import layers
+import keras
 
 import gym
 from matplotlib import pyplot as plt
 from config import infos
-import pickle
 from collections import deque
 import numpy as np
 import random
-from tqdm import tqdm
+import copy
+import sys
+import argparse
+
 
 env = gym.envs.make("CartPole-v1")
 state_size = env.observation_space.shape[0]
 action_size = env.action_space.n
+output_dir = "./weights"
+memory = deque(maxlen=2000)
 
-class DQNAgent:
-    
-    def __init__(self, state_size, action_size):
-        self.state_size = state_size
-        self.action_size = action_size
-        self.memory = deque(maxlen=infos.len_memory)
-        self.epsilon = infos.epsilon
-        self.m1 = self.kreate_model()
-        #self.m2 = self.kreate_model()
-        #self.m2.set_weights(self.m1.get_weights())
-        
-        
-    def kreate_model(self):
-        learning_rate = infos.learning_rate
-        model = keras.Sequential()
-        model.add(keras.layers.Dense(8, input_shape=[self.state_size], activation='relu'))
-        model.add(keras.layers.Dense(16, activation='relu'))
-        model.add(keras.layers.Dense(32, activation='relu'))
-        model.add(keras.layers.Dense(64, activation='relu'))
-        model.add(keras.layers.Dense(self.action_size, activation='linear'))
-        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=learning_rate))
-        return model
+def init_model(state_size, action_size):
+	learning_rate = infos.learning_rate
+	model = keras.Sequential()
+	model.add(keras.layers.Dense(8, input_shape=[state_size], activation='relu'))
+	model.add(keras.layers.Dense(16, activation='relu'))
+	model.add(keras.layers.Dense(32, activation='relu'))
+	# model.add(keras.layers.Dense(64, activation='relu'))
+	model.add(keras.layers.Dense(action_size, activation='linear'))
+	model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=learning_rate))
+	return model
 
-    def acting(self, state):
-        if (random.random() < self.epsilon):
-            return random.randrange(self.action_size)
-        action = np.argmax(self.m1.predict(state)[0]) ### change to m2
-        return (action)
-    
-    def fitting(self, state, action, reward, new_state, target_qvalues):
-        target = reward + (infos.discount_factor * max(self.m1.predict(new_state)[0])) ### change to m2
-        target_qvalues[0][action] = target 
-        self.m1.fit(state, target_qvalues, verbose = 0)
-        
-    def evaluate(self):
-        results = []
-        for episode in range(infos.eval_size):
-            state = env.reset()
-            state = np.reshape(state, [1, state_size])
-            steps = 0
-            done = False
-            
-            while not done and steps < infos.replay_memory:
-                predicted_qvalues = self.m1.predict(state)
-                action = np.argmax(predicted_qvalues[0])
-                state, _, done, _  = env.step(action)
-                state = np.reshape(state, [1, state_size])
-                steps += 1
-            results.append(steps)
-        return np.mean(results)
-    
-    def update_epsilon(self):
-        if self.epsilon > infos.epislon_min:
-            self.epsilon = self.epsilon * infos.epsilon_decay
-        if (self.epsilon <= infos.epislon_min):
-            self.epsilon = infos.epislon_min
+def policy(state, predicted_qvalues, epsilon):
+	if (random.random() < epsilon):
+		action = random.randint(0, 1)
+	else:
+		action = np.argmax(predicted_qvalues)
+	return (action)
 
 def load(name, model):
-    model.load_weights(name)
-    
+	model.load_weights(name)
+	
 def save(name, model):
-    model.save_weights(name)
+	model.save_weights(name)
+
+def fit_model(state, action, reward, new_state, m1, m2, target_qvalues):
+	target = reward + (infos.discount_factor * max(m2.predict(new_state)[0]))
+	target_qvalues[0][action] = target 
+	m1.fit(state, target_qvalues, verbose = 0)
+	return m1, m2
+
+def copy_model(model):
+  model_copy = keras.models.clone_model(model)
+  model_copy.build((None, action_size))
+  model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=infos.learning_rate))
+  model_copy.set_weights(model.get_weights())
+  return (model_copy)
+
+def eval(m1):
+	results = []
+	for episode in range(3):
+		state = env.reset()
+		state = np.reshape(state, [1, state_size])
+		steps = 0
+		done = False
+		
+		while not done and steps < 200:
+			predicted_qvalues = m1.predict(state)
+			action = np.argmax(predicted_qvalues[0])
+			state, _, done, _  = env.step(action)
+			state = np.reshape(state, [1, state_size])
+			steps += 1
+		results.append(steps)
+	return np.mean(results)
+
+def play(m1):
+	while True:
+		state = env.reset()
+		state = np.reshape(state, [1, state_size])
+		done = False
+		while not done:
+			action = np.argmax(m1.predict(state)[0]) # action = random.randint(0, 1)
+			new_state, reward, done, _  = env.step(action)
+			env.render()
+			state = new_state
+	env.close()
 
 
 def learn():
-    agent = DQNAgent(state_size, action_size)
-    for episode in tqdm(range(infos.episodes)):
-        state = env.reset()
-        state = np.reshape(state, [1, state_size])
-        steps = 0
-        done = False
-        
-        while not done:
-            predicted_qvalues = agent.m1.predict(state) ### change to m2
-            action = agent.acting(state)
-            new_state, reward, done, _  = env.step(action)
-            new_state = np.reshape(new_state, [1, state_size])
-            steps += 1
-            if done == True:
-                reward = infos.reward_values[0]
-            agent.memory.append((state, action, reward, new_state, done))
-            agent.fitting(state, action, reward, new_state, predicted_qvalues)
-            state = new_state
-        
-        if len(agent.memory) > infos.replay_memory and (random.random() < 0.5):
-          minibatch = random.sample(agent.memory, infos.replay_memory)
-          for state, action, reward, new_state, done in minibatch:
-              predicted_qvalues = agent.m1.predict(state) ### change to m2
-              action = agent.acting(state)
-              agent.fitting(state, action, reward, new_state, predicted_qvalues)
+	epsilon = infos.epsilon
+	m1 = init_model(state_size, action_size)
+	m2 = init_model(state_size, action_size)
+	m2.set_weights(m1.get_weights())
+	for episode in range(infos.episodes):
+		state = env.reset()
+		state = np.reshape(state, [1, state_size])
+		steps = 0
+		done = False
+		
+		while not done:
+			predicted_qvalues = m2.predict(state)
+			action = policy(state, predicted_qvalues[0], epsilon)
+			new_state, reward, done, _  = env.step(action)
+			new_state = np.reshape(new_state, [1, state_size])
+			steps += 1
+			if done == True:
+				reward = infos.reward_values[0]
+			memory.append((state, action, reward, new_state, done))
+			m1, m2 = fit_model(state, action, reward, new_state, m1, m2, predicted_qvalues)
+			state = new_state
+		
+		if len(memory) > 200 and (random.random() < 0.5):
+		  print(f"*** memory replay for episode:{episode}")
+		  minibatch = random.sample(memory, infos.batch_size)
+		  ### check minibatch
+		  for state, action, reward, new_state, done in minibatch:
+			  predicted_qvalues = m1.predict(state)
+			  action = policy(state, predicted_qvalues[0], epsilon)
+			  m1, m2 = fit_model(state, action, reward, new_state, m1, m2, predicted_qvalues)
 
-        agent.update_epsilon()
-        # print(f'\nepisode = {episode}, total_steps = {steps} and epsilon == {round(epsilon, 3)}')
-        # if episode % 10 == 0 and episode != 0:
-        #   print(f"evaluation m1 = {agent.evaluate()}")
-        #   agent.m1.set_weights(agent.m1.get_weights()) ### change to m2
+		epsilon = epsilon * infos.epsilon_decay
+		if (epsilon < infos.epislon_min):
+			epsilon = infos.epislon_min
+		  
+		print(f'\nepisode = {episode}, total_steps = {steps} and epsilon == {round(epsilon, 3)}')
+		if episode % 10 == 0 and episode != 0:
+		  print(f"evaluation m1 = {eval(m1)}")
+		  m2.set_weights(m1.get_weights())
 
-        if episode % 50 == 0 and episode != 0:
-            agent.m1.save_weights(f'weigths/with_dqn_{episode}.hdf5', agent.m1)
-            
-    return agent.m1
+		if episode % 50 == 0 and episode != 0:
+			save(f'outs/with_dqn_{episode}.hdf5', m1)
+	return m1
 
 if __name__ == "__main__":
-    m1 = learn()
\ No newline at end of file
+	m1 = learn()
+	episode = sys.argv[1]
+	m1 = init_model(state_size, action_size)
+	m1.load_weights("weigths/weights_dqn_550e")
+	play(m1)
\ No newline at end of file
diff --git a/neural_network2.py b/neural_network2.py
deleted file mode 100644
index 274aa8a..0000000
--- a/neural_network2.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# import tensorflow as tf
-# from tensorflow import keras
-# from tensorflow.keras import layers
-
-import gym
-from matplotlib import pyplot as plt
-from config import infos
-import pickle
-from collections import deque
-import numpy as np
-import random
-import copy
-import sys
-
-
-env = gym.envs.make("CartPole-v1")
-state_size = env.observation_space.shape[0]
-action_size = env.action_space.n
-print(f"state_size = {state_size}, action size = {action_size}")
-output_dir = "./cartpole/outs"
-memory = deque(maxlen=2000)
-
-def init_model(state_size, action_size):
-    learning_rate = infos.learning_rate
-    model = keras.Sequential()
-    model.add(keras.layers.Dense(8, input_shape=[state_size], activation='relu'))
-    model.add(keras.layers.Dense(16, activation='relu'))
-    model.add(keras.layers.Dense(32, activation='relu'))
-    #model.add(keras.layers.Dense(64, activation='relu'))
-    model.add(keras.layers.Dense(action_size, activation='linear'))
-    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=learning_rate))
-    return model
-
-def policy(state, predicted_qvalues, epsilon):
-    if (random.random() < epsilon):
-        action = random.randint(0, 1)
-    else:
-        action = np.argmax(predicted_qvalues)
-    return (action)
-
-def load(name, model):
-    model.load_weights(name)
-    
-def save(name, model):
-    model.save_weights(name)
-
-def fit_model(state, action, reward, new_state, m1, m2, target_qvalues):
-    target = reward + (infos.discount_factor * max(m2.predict(new_state)[0]))
-    target_qvalues[0][action] = target 
-    #is .fit() good ?
-    # print(f"state = {state}, action = {action}, reward = {reward}, target_qvalues = {target_qvalues}")
-    m1.fit(state, target_qvalues, verbose = 0)
-    return m1, m2
-
-def copy_model(model):
-  model_copy = keras.models.clone_model(model)
-  model_copy.build((None, action_size)) # replace 10 with number of variables in input layer
-  model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=infos.learning_rate))
-  model_copy.set_weights(model.get_weights())
-  return (model_copy)
-
-def eval(m1):
-    results = []
-    for episode in range(3):
-        state = env.reset()
-        state = np.reshape(state, [1, state_size])
-        steps = 0
-        done = False
-        
-        while not done and steps < 200:
-            predicted_qvalues = m1.predict(state)
-            action = np.argmax(predicted_qvalues[0])
-            state, _, done, _  = env.step(action)
-            state = np.reshape(state, [1, state_size])
-            steps += 1
-        results.append(steps)
-    return np.mean(results)
-
-def play(m1):
-    while True:
-        state = env.reset()
-        state = np.reshape(state, [1, state_size])
-        done = False
-        while not done:
-            action = random.randint(0, 1)#np.argmax(m1.predict(state)[0])
-            new_state, reward, done, _  = env.step(action)
-            env.render()
-            state = new_state
-    env.close()
-
-
-def learn():
-    epsilon = infos.epsilon
-    m1 = init_model(state_size, action_size)
-    m2 = init_model(state_size, action_size)
-    m2.set_weights(m1.get_weights())
-    for episode in range(infos.episodes):
-        state = env.reset()
-        state = np.reshape(state, [1, state_size])
-        steps = 0
-        done = False
-        
-        while not done:
-            predicted_qvalues = m2.predict(state)
-            action = policy(state, predicted_qvalues[0], epsilon)
-            new_state, reward, done, _  = env.step(action)
-            new_state = np.reshape(new_state, [1, state_size])
-            steps += 1
-            if done == True:
-                reward = infos.reward_values[0]
-            memory.append((state, action, reward, new_state, done))
-            m1, m2 = fit_model(state, action, reward, new_state, m1, m2, predicted_qvalues)
-            state = new_state
-        
-        if len(memory) > 200 and (random.random() < 0.5):
-          print(f"*** memory replay for episode:{episode}")
-          minibatch = random.sample(memory, infos.batch_size)
-          ### check minibatch
-          for state, action, reward, new_state, done in minibatch:
-              predicted_qvalues = m1.predict(state)
-              action = policy(state, predicted_qvalues[0], epsilon)
-              m1, m2 = fit_model(state, action, reward, new_state, m1, m2, predicted_qvalues)
-
-        epsilon = epsilon * infos.epsilon_decay
-        if (epsilon < infos.epislon_min):
-            epsilon = infos.epislon_min
-          
-        print(f'\nepisode = {episode}, total_steps = {steps} and epsilon == {round(epsilon, 3)}')
-        if episode % 10 == 0 and episode != 0:
-          print(f"evaluation m1 = {eval(m1)}")
-          m2.set_weights(m1.get_weights())
-
-        if episode % 50 == 0 and episode != 0:
-            save(f'outs/with_dqn_{episode}.hdf5', m1)
-    return m1
-
-if __name__ == "__main__":
-    #m1 = learn()
-    episode = sys.argv[1]
-    m1 = None
-    # m1 = init_model(state_size, action_size)
-    # m1.load_weights(f'outs/with_dqn_{episode}.hdf5')
-    play(m1)
\ No newline at end of file
diff --git a/q_table_bis.pkl b/q_table_bis.pkl
deleted file mode 100644
index 2f0fe38..0000000
Binary files a/q_table_bis.pkl and /dev/null differ
diff --git a/tabledeq.py b/tabledeq.py
index f08fe66..ddc8c81 100644
--- a/tabledeq.py
+++ b/tabledeq.py
@@ -4,6 +4,16 @@
 import numpy as np
 from config import infos
 import pickle
+import sys
+import argparse
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-l', '--load', help='Choose if you want to load an existing qtable', action='store_true')
+    parser.add_argument('-e', '--epochs', help='Choose the number of epochs you need for your training', type=int, default=200)
+    args = parser.parse_args()
+    return (args)
+
 
 def render_graph(avg_steps, avg_episodes, goal):
     if infos.graph == 1:
@@ -89,7 +99,7 @@ def learn():
             print(f'episode = {episode}, avg_steps = {round(avg_steps[-1], 3)} and epsilon == {round(infos.epsilon, 3)}')
 
         if avg_steps[-1] > 400:
-            with open("q_table_bis.pkl", "wb+") as f:
+            with open(f"q_table_bis.pkl", "wb+") as f:
                 pickle.dump(Q_table, f)
             return
         
@@ -115,11 +125,15 @@ def play():
             state = new_state
     env.close()
 
-def read_cutie():
-    with open("q_table.pkl", "rb") as f:
+def read_cutie(file):
+    with open(file, "rb") as f:
         return pickle.load(f)
 
+
 if __name__ == "__main__":
-    # learn()
-    Q_table = read_cutie()
+    args = parse_arguments()
+    if args.load == False:
+        learn()
+    elif args.load == True:
+        Q_table = read_cutie("q_table.pkl")
     play()
\ No newline at end of file
diff --git a/weigths/with_dqn_550.hdf5 b/weigths/weights_dqn_550e
similarity index 100%
rename from weigths/with_dqn_550.hdf5
rename to weigths/weights_dqn_550e
diff --git a/weigths/with_dqn_100.hdf5 b/weigths/with_dqn_100.hdf5
deleted file mode 100644
index c6bd47a..0000000
Binary files a/weigths/with_dqn_100.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_150.hdf5 b/weigths/with_dqn_150.hdf5
deleted file mode 100644
index 25a2dc1..0000000
Binary files a/weigths/with_dqn_150.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_200.hdf5 b/weigths/with_dqn_200.hdf5
deleted file mode 100644
index f9fd7eb..0000000
Binary files a/weigths/with_dqn_200.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_250.hdf5 b/weigths/with_dqn_250.hdf5
deleted file mode 100644
index 63c48be..0000000
Binary files a/weigths/with_dqn_250.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_300.hdf5 b/weigths/with_dqn_300.hdf5
deleted file mode 100644
index dc5edf3..0000000
Binary files a/weigths/with_dqn_300.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_350.hdf5 b/weigths/with_dqn_350.hdf5
deleted file mode 100644
index b77b2de..0000000
Binary files a/weigths/with_dqn_350.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_400.hdf5 b/weigths/with_dqn_400.hdf5
deleted file mode 100644
index 0b6cf16..0000000
Binary files a/weigths/with_dqn_400.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_450.hdf5 b/weigths/with_dqn_450.hdf5
deleted file mode 100644
index e005587..0000000
Binary files a/weigths/with_dqn_450.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_50.hdf5 b/weigths/with_dqn_50.hdf5
deleted file mode 100644
index b97b8ef..0000000
Binary files a/weigths/with_dqn_50.hdf5 and /dev/null differ
diff --git a/weigths/with_dqn_500.hdf5 b/weigths/with_dqn_500.hdf5
deleted file mode 100644
index a2d4af4..0000000
Binary files a/weigths/with_dqn_500.hdf5 and /dev/null differ