### BreadBasket Dataset Using Apriori & FP Grouth Algorithms:

In [1]:
import pandas as pd
import numpy as np

from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
bbdata = pd.read_excel('BreadBasket.xlsx', names = ['Tx', 'products'])
bbdata.head(5)

Unnamed: 0,Tx,products
0,0,"MILK,BREAD,BISCUIT"
1,1,"BREAD,MILK,BISCUIT,CORNFLAKES"
2,2,"BREAD,TEA,BOURNVITA"
3,3,"JAM,MAGGI,BREAD,MILK"
4,4,"MAGGI,TEA,BISCUIT"


In [3]:
bbdata.shape

(20, 2)

In [4]:
dataset = list(bbdata["products"].apply(lambda x:x.split(",") ))
dataset

[['MILK', 'BREAD', 'BISCUIT'],
 ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'],
 ['BREAD', 'TEA', 'BOURNVITA'],
 ['JAM', 'MAGGI', 'BREAD', 'MILK'],
 ['MAGGI', 'TEA', 'BISCUIT'],
 ['BREAD', 'TEA', 'BOURNVITA'],
 ['MAGGI', 'TEA', 'CORNFLAKES'],
 ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'],
 ['JAM', 'MAGGI', 'BREAD', 'TEA'],
 ['BREAD', 'MILK'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'SUGER', 'BOURNVITA'],
 ['BREAD', 'COFFEE', 'COCK'],
 ['BREAD', 'SUGER', 'BISCUIT'],
 ['COFFEE', 'SUGER', 'CORNFLAKES'],
 ['BREAD', 'SUGER', 'BOURNVITA'],
 ['BREAD', 'COFFEE', 'SUGER'],
 ['BREAD', 'COFFEE', 'SUGER'],
 ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]

### Apriori Algorithm

In [5]:
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
te_ary

array([[ True, False,  True, False, False, False, False, False,  True,
        False, False],
       [ True, False,  True, False, False,  True, False, False,  True,
        False, False],
       [False,  True,  True, False, False, False, False, False, False,
        False,  True],
       [False, False,  True, False, False, False,  True,  True,  True,
        False, False],
       [ True, False, False, False, False, False, False,  True, False,
        False,  True],
       [False,  True,  True, False, False, False, False, False, False,
        False,  True],
       [False, False, False, False, False,  True, False,  True, False,
        False,  True],
       [ True, False,  True, False, False, False, False,  True, False,
        False,  True],
       [False, False,  True, False, False, False,  True,  True, False,
        False,  True],
       [False, False,  True, False, False, False, False, False,  True,
        False, False],
       [ True, False, False,  True,  True,  True, False, Fal

In [6]:
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,BISCUIT,BOURNVITA,BREAD,COCK,COFFEE,CORNFLAKES,JAM,MAGGI,MILK,SUGER,TEA
0,True,False,True,False,False,False,False,False,True,False,False
1,True,False,True,False,False,True,False,False,True,False,False
2,False,True,True,False,False,False,False,False,False,False,True
3,False,False,True,False,False,False,True,True,True,False,False
4,True,False,False,False,False,False,False,True,False,False,True
5,False,True,True,False,False,False,False,False,False,False,True
6,False,False,False,False,False,True,False,True,False,False,True
7,True,False,True,False,False,False,False,True,False,False,True
8,False,False,True,False,False,False,True,True,False,False,True
9,False,False,True,False,False,False,False,False,True,False,False


In [7]:
from mlxtend.frequent_patterns import apriori

apriori(df, min_support=0.2)

Unnamed: 0,support,itemsets
0,0.35,(0)
1,0.2,(1)
2,0.65,(2)
3,0.4,(4)
4,0.3,(5)
5,0.25,(7)
6,0.25,(8)
7,0.3,(9)
8,0.35,(10)
9,0.2,"(0, 2)"


In [8]:
apriori(df, min_support=0.2, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.35,(BISCUIT)
1,0.2,(BOURNVITA)
2,0.65,(BREAD)
3,0.4,(COFFEE)
4,0.3,(CORNFLAKES)
5,0.25,(MAGGI)
6,0.25,(MILK)
7,0.3,(SUGER)
8,0.35,(TEA)
9,0.2,"(BISCUIT, BREAD)"


In [9]:
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.35,(BISCUIT),1
1,0.2,(BOURNVITA),1
2,0.65,(BREAD),1
3,0.4,(COFFEE),1
4,0.3,(CORNFLAKES),1
5,0.25,(MAGGI),1
6,0.25,(MILK),1
7,0.3,(SUGER),1
8,0.35,(TEA),1
9,0.2,"(BISCUIT, BREAD)",2


In [10]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) & 
                   (frequent_itemsets['support'] >= 0.2) ]

Unnamed: 0,support,itemsets,length
9,0.2,"(BISCUIT, BREAD)",2
10,0.2,"(MILK, BREAD)",2
11,0.2,"(SUGER, BREAD)",2
12,0.2,"(TEA, BREAD)",2
13,0.2,"(CORNFLAKES, COFFEE)",2
14,0.2,"(SUGER, COFFEE)",2
15,0.2,"(MAGGI, TEA)",2


In [11]:
frequent_itemsets[ frequent_itemsets['itemsets'] == {'BREAD', 'MILK'} ]

Unnamed: 0,support,itemsets,length
10,0.2,"(MILK, BREAD)",2


In [12]:
from mlxtend.frequent_patterns import association_rules

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(MILK),(BREAD),0.25,0.65,0.2,0.8,1.230769,0.0375,1.75,0.25
1,(SUGER),(BREAD),0.3,0.65,0.2,0.666667,1.025641,0.005,1.05,0.035714
2,(CORNFLAKES),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8,0.571429
3,(SUGER),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8,0.571429
4,(MAGGI),(TEA),0.25,0.35,0.2,0.8,2.285714,0.1125,3.25,0.75


### FP Grouth Algorithm

In [13]:
from mlxtend.frequent_patterns import fpgrowth

fpgrowth(df, min_support=0.3)

Unnamed: 0,support,itemsets
0,0.65,(2)
1,0.35,(0)
2,0.3,(5)
3,0.35,(10)
4,0.4,(4)
5,0.3,(9)


In [14]:
fpgrowth(df, min_support=0.3, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.65,(BREAD)
1,0.35,(BISCUIT)
2,0.3,(CORNFLAKES)
3,0.35,(TEA)
4,0.4,(COFFEE)
5,0.3,(SUGER)


In [15]:
from mlxtend.frequent_patterns import association_rules

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.8)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(MILK),(BREAD),0.25,0.65,0.2,0.8,1.230769,0.0375,1.75,0.25
1,(MAGGI),(TEA),0.25,0.35,0.2,0.8,2.285714,0.1125,3.25,0.75


### Comparative Study of Appriori and FP Growth

In [16]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

In [17]:
from mlxtend.frequent_patterns import apriori

%timeit -n 100 -r 10 apriori(df, min_support=0.6)

978 µs ± 72.4 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [18]:
%timeit -n 100 -r 10 apriori(df, min_support=0.6, low_memory=True)

1 ms ± 41 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [19]:
from mlxtend.frequent_patterns import fpgrowth

%timeit -n 100 -r 10 fpgrowth(df, min_support=0.6)

662 µs ± 120 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
