# GRADED FUNCTION: backward_propagation def backward_propagation(parameters, cache, X, Y): """ Implement the backward propagation using the instructions above. Arguments: parameters -- python dictionary containing our parameters cache -- a dictionary containing "Z1", "A1", "Z2" and "A2". X -- input data of shape (2, number of examples) Y -- "true" labels vector of shape (1, number of examples) Returns: grads -- python dictionary containing your gradients with respect to different parameters """ m = X.shape[1] # First, retrieve W1 and W2 from the dictionary "parameters". ### START CODE HERE ### (≈ 2 lines of code) W1 = parameters['W1'] W2 = parameters['W2'] ### END CODE HERE ### # Retrieve also A1 and A2 from dictionary "cache". ### START CODE HERE ### (≈ 2 lines of code) A1 = cache['A1'] A2 = cache['A2'] ### END CODE HERE ### # Backward propagation: calculate dW1, db1, dW2, db2. ### START CODE HERE ### (≈ 6 lines of code, corresponding to 6 equations on slide above) dZ2= A2-Y dW2 = 1.0/m*np.dot(dZ2, A1.T) db2 = 1.0/m*np.sum(dZ2, axis=1, keepdims=True) dZ1 = np.dot(W2.T, dZ2) * (1-np.power(A1,2)) dW1 = 1.0/m*np.dot(dZ1, X.T) db1 = 1.0/m*np.sum(dZ1, axis=1, keepdims=True) ### END CODE HERE ### grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2} return grads