import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt
import torch as th
from torch.autograd.functional import hessian

def f(x, y):
    return 2 * np.sum(np.log(LA.norm(x-y, axis=1)), axis=0)

y = np.array([[1., 0.], [-1., 1.], [-1.5, -1.]])
f([0,0], y)
#y = y.reshape(1,3,2)

1.8718021769015913

x_ = np.linspace(-2, 2, 300)
y_ = np.linspace(-2, 2, 300)
xx, yy = np.meshgrid(x_, y_)
xy = np.array([xx.flatten(),yy.flatten()])
zz = f(xy, y.reshape(3,2,1))

_ = plt.contour(xx, yy, zz.reshape(xx.shape), levels=30)

def grad_f(x, y):
    return np.sum(2 * (x-y) / (LA.norm(x - y, axis=1).reshape(3,1))**2, axis=0)

x = np.array([0.5, 1.5])
y = np.array([[1., 0.], [-1., 1.], [-1.5, -1.]])

grad_f(x, y)

array([1.1902439 , 2.08780488])

def grad_descent(x, y, lr=0.01):
    x_hist = np.array(x)

    for i in range(35):
        grad_x = grad_f(x, y)
        x = x - lr * grad_x
        x_hist = np.vstack((x_hist, x))
        
    return x_hist
        
lr = 0.05
x = np.array([1, 1.5])
y = np.array([[1., 0.], [-1., 1.], [-1.5, -1.]])
x_hist = grad_descent(x, y, lr=lr)
#print(x_hist)

plt.contour(xx, yy, zz.reshape(xx.shape), levels=30)
_ = plt.plot(x_hist[:,0],x_hist[:,1], "->")

def grad_desc_line_search(x, y):
    
    x_hist = np.array(x)
    
    for i in range(100):
          
        change = -1 * grad_f(x, y)
        
        lr_list = np.linspace(0.,1,1000)
        min_value = f(x + lr_list[0] * change, y)
        min_lr = lr_list[0]

        for lr in lr_list:
            value = f(x + lr * change, y)
            if value < min_value:
                min_value = value
                min_lr = lr
        
        if min_lr!=0:
            x = x + min_lr * change
            x_hist = np.vstack((x_hist, x))
    
    return x_hist

x = np.array([1, 1.5])
y = np.array([[1., 0.], [-1., 1.], [-1.5, -1.]])

x_hist2 = grad_desc_line_search(x, y)
print(x_hist2[-1])
print(x_hist2.shape)

[ 0.99185658 -0.00607454]
(5, 2)

plt.contour(xx, yy, zz.reshape(xx.shape), levels=30)
_ = plt.plot(x_hist2[:,0], x_hist2[:,1], "-*r")

def f(x, y):
    return 2 * np.sum(np.log(LA.norm(x-y, axis=1)), axis=0)

def grad_f(x, y):
    return np.sum(2 * (x-y) / (LA.norm(x - y, axis=1).reshape(3,1))**2, axis=0)

def min_lr_search(x, y, change):
    
    lr_list = np.linspace(0.,1,1000)
    min_value = f(x + lr_list[0] * change, y)
    min_lr = lr_list[0]

    for lr in lr_list:
        value = f(x + lr * change, y)
        if value < min_value:
            min_value = value
            min_lr = lr
            
    return min_lr

"""
For beta:

Fletcher-Reeves formula
beta = np.max((0, (change[i].T @ change[i])/(change[i-1].T @ change[i-1])))

Polak-Ribière formula     
beta = np.max((0, (change[i].T @ (change[i] - change[i-1])) / (change[i-1].T @ change[i-1]) ))
"""
def grad_desc_conj_grad(x, y):
    
    c = []
    change = []
    x_hist = np.array(x)
    
    change.append(-1 * grad_f(x, y)) 
    c.append(change[0])
    min_lr = min_lr_search(x, y, c[0])

    x = x + min_lr * c[0]
    x_hist = np.vstack((x_hist, x))
    
    
    for i in range(1,30):
        
        change.append(-1 * grad_f(x, y))
        
        beta = np.max((0, (change[i].T @ (change[i] - change[i-1])) / (change[i-1].T @ change[i-1]) ))  #PR
        #beta = np.max((0, (change[i].T @ change[i])/(change[i-1].T @ change[i-1])))  #FR
          
        c.append(change[i] + beta * c[i-1])

        min_lr = min_lr_search(x, y, c[i])
        
        if min_lr != 0: 
            x = x + min_lr * c[i]
            x_hist = np.vstack((x_hist, x))
    
    return x_hist

x = np.array([1, 1.5])
y = np.array([[1., 0.], [-1., 1.], [-1.5, -1.]])

x_hist2 = grad_desc_conj_grad(x, y)
print("x_hist2.shape= ", x_hist2.shape)
print(x_hist2[-1])

x_hist2.shape=  (6, 2)
[ 1.0086712  -0.00769076]

plt.contour(xx, yy, zz.reshape(xx.shape), levels=30)
_ = plt.plot(x_hist2[:,0], x_hist2[:,1], "-*r")

def f_th(x): 
    y = th.tensor([[1., 0.], [-1., 1.], [-1.5, -1.]])
    return 2 * th.sum(th.log(th.norm(x-y, dim=1)), dim=0)

def grad_f(x):
    y = np.array([[1., 0.], [-1., 1.], [-1.5, -1.]])
    return np.sum(2 * (x-y) / (LA.norm(x - y, axis=1).reshape(3,1))**2, axis=0)

def newtons_method(x, lr):

    x_hist = np.array(x)

    for i in range(5):
        change = -1 * grad_f(x)
        hessian = th.autograd.functional.hessian(f_th, th.tensor(x))

        x = x - lr * LA.inv(hessian.numpy()) @ change
        x_hist = np.vstack((x_hist, x))

    return x_hist    

lr = 0.3
x = np.array([1.,1.])

x_hist = newtons_method(x, lr)
x_hist[-1]

array([1.06883485, 0.07265472])

plt.contour(xx, yy, zz.reshape(xx.shape), levels=30)
_ = plt.plot(x_hist[:,0], x_hist[:,1], "-*r")

Numerical Optimization

Optimization Algorithms¶

Gradient Descent¶

Gradient Descent with Line Searches¶

Gradient Descent Using Conjugate Gradients¶

Gradient Descent Using Newton's Method¶

Constrained Optimization Algorithms¶

Projected Gradients¶

Elif Cansu YILDIZ