
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Simulations Indian Buffet Process
% Machine Learning Reading Group
%
% Francois Caron
% University of British Columbia
% October 2, 2007
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function indianBuffet()

% Simulation finite mixture model (using Polya Urn scheme)
% Show Allocation variables and induced partition
N=20; % Number of objects
K=6; % Number of clusters
alpha=1; % Scale parameter of the Dirichlet distribution
[c bin_c bin_part]=FMM(N,K,alpha);
plot1(bin_c,bin_part); %plot a realisation of a Dirichlet-multinomial and induced partition

% Simulation finite latent feature model (using hierarchical model)
N=20; % Number of objects
K=40; % Number of features
alpha=20;
[bin_lat bin_eq]=FFM(N,K,alpha);
plot2(bin_lat,bin_eq); % Plot a realisation and induced left-ordered matrix

% Simulation Indian Buffet process for different values of alpha
N=30;
alpha=1;
bin_ibp1=ibprnd(N,alpha);
alpha=5;
bin_ibp2=ibprnd(N,alpha);
alpha=10;
bin_ibp3=ibprnd(N,alpha);
plot3(bin_ibp1,bin_ibp2,bin_ibp3,[1,5,10]); % plot 3 realisations of IBP for different alpha


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Functions
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



function [c bin_c bin_part]=FMM(N,K,alpha)

% Sample from a finite mixture model
% N objects, K clusters, scale parameter alpha
%
% c: allocation variable
% bin_c: allocation variable as a binary matrix
% bin_part: partition induced by c

c=zeros(N,1); %Allocation variable
bin_c=zeros(N,K); % Representation as a binary matrix
bin_part=zeros(N,K); % Representation of the partition
m=alpha/K*ones(1,K); % Count vector
n_k=0;
for k=1:N % Objects
   u=rand(1);
   c(k)=find(cumsum(m/sum(m))>u,1,'first'); % allocation variable
   bin_c(k,c(k))=1; % associated binary vector
   m=m+c(k,:); % Vector of counts
   % Look for order of appearance of new clusters
   if isempty(find(c(1:k-1)==c(k)))
       n_k=n_k+1;
       val(n_k)=c(k);
   end
end
bin_part(:,1:length(val))=bin_c(:,val); % Partition induced by allocation variables

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function [bin_lat bin_eq]=FFM(N,K,alpha)

for j=1:K % Features
   pi(j)=betarnd(alpha/K,1); % Probability of an object having feature j
   for k=1:N
       bin_lat(k,j)=binornd(1,pi(j)); % Bernoulli distribution
   end
end
% Left-ordered equivalence class for the finite latent feature model
coeffs=repmat((2.^(N-1:-1:0))',1,K);
matrix=sum(coeffs.*bin_lat,1);
[temp ind]=sort(matrix,'descend');
bin_eq=bin_lat(:,ind);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function bin_ibp=ibprnd(N,alpha)

% Sample from an indian Buffet Process
% N: Number of customers
% alpha: Scale parameter of the IBP

m=[]; %Count vector
bin_ibp=[]; % Binary matrix
for k=1:N
   ind_max=length(m);
   for j=1:ind_max
       bin_ibp(k,j)=binornd(1,m(j)/k);
   end
   new_ind=ind_max+1;
   bin_ibp(k,new_ind:new_ind+poissrnd(alpha/k)-1)=1;
   m=sum(bin_ibp,1);
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function plot1(bin_c,bin_part)

[N K]=size(bin_c);

figure('name','Mixture model')
imagesc(bin_c)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:K-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Clusters')
ylabel('Objects')
saveas(gca,'figure1','fig')
saveas(gca,'figure1','png')

figure('name','Mixture model')
subplot(1,2,1)
imagesc(bin_c)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:K-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Clusters')
ylabel('Objects')
title('Allocation variables')
subplot(1,2,2)
imagesc(bin_part)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:K-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Clusters')
ylabel('Objects')
title('Partition')
saveas(gca,'figure2','fig')
saveas(gca,'figure2','png')


function plot2(bin_lat,bin_eq)

[N,K]=size(bin_lat);

figure('name','Finite latent feature')
imagesc(bin_lat)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:K-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Features')
ylabel('Objects')
saveas(gca,'figure3','fig')
saveas(gca,'figure3','png')

figure('name','Finite latent feature')
subplot(1,2,1)
imagesc(bin_lat)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:K-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Features')
ylabel('Objects')
subplot(1,2,2)
imagesc(bin_eq)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:K-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Features')
ylabel('Objects')
saveas(gca,'figure4','fig')
saveas(gca,'figure4','png')

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function plot3(bin_ibp1,bin_ibp2,bin_ibp3,alpha)

N=size(bin_ibp1,1);

figure('name','Indian Buffet')
subplot(1,3,1)
imagesc(bin_ibp1)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:100-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Features')
ylabel('Objects')
xlim([0.5 50])
title(['alpha=' num2str(alpha(1))])
subplot(1,3,2)
imagesc(bin_ibp2)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:100-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Features')
ylabel('Objects')
xlim([0.5 50])
title(['alpha=' num2str(alpha(2))])

subplot(1,3,3)
imagesc(bin_ibp3)
colormap(flipud(gray))
set(gca,'XAxisLocation','top','XTick',1.5:100-.5,'YTick',1.5:N-.5,...
   'XTickLabel',{},'YTickLabel',{},'GridLineStyle','-')
grid on
xlabel('Features')
ylabel('Objects')
xlim([0.5 50])
title(['alpha=' num2str(alpha(3))])
saveas(gca,'figure5','fig')
saveas(gca,'figure5','png')
