# coding: utf-8

# # Interpolative Decomposition

# In[1]:

import numpy as np
import scipy.linalg as la
import matplotlib.pyplot as pt


# ## Obtain a low-rank matrix

# In[5]:

n = 100
A0 = np.random.randn(n, n)
U0, sigma0, VT0 = la.svd(A0)
print(la.norm((U0*sigma0).dot(VT0) - A0))

sigma = np.exp(-np.arange(n))

A = (U0 * sigma).dot(VT0)
pt.semilogy(sigma)


# ## Run the factorization

# In[4]:

import scipy.linalg.interpolative as sli


# Compute a fixed-rank factorization:
# 
# (There's also an adaptive, fixed-precision mode.)

# In[17]:

k = 20
idx, proj = sli.interp_decomp(A, k)


# What does `numpy.argsort` do?

# In[25]:

idx


# In[24]:

sort_idx = np.argsort(idx)


# In[23]:

idx[sort_idx]


# Reconstruct the matrix:

# In[30]:

B = A[:,idx[:k]]
P = np.hstack([np.eye(k), proj])[:,np.argsort(idx)]
Aapprox = np.dot(B, P)


# In[31]:

la.norm(A - Aapprox, 2)


# What's the structure of $P$?
# 
# (ignoring the column permuation)

# In[29]:

pt.imshow(np.hstack([np.eye(k), proj]))


# * Why don't we use *just* the ID then?
# * Reasonable question, answer is: we should.
# * BUT: The ID as called above is based on the same randomized machinery that we've built up, so it's not like we'd *actually* reduce complexity that way.