dota2-draft-anaylsis

Draft Analysis of Dota 2

# # Importing Packages
import os
import copy
import config
import dota2api
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB

%matplotlib inline

# # STEAM Client API Key to access the dota2 api
API_KEY = config.STEAM_API_KEY
api = dota2api.Initialise(API_KEY)

data = pd.read_csv('../data/draft/capmodedata.csv')

data.head(5)

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }

	match_id	radiant_win	duration	1is_pick	1team_id	1hero_id	2is_pick	2team_id	2hero_id	3is_pick	...	17hero_id	18is_pick	18team_id	18hero_id	19is_pick	19team_id	19hero_id	20is_pick	20team_id	20hero_id
0	1928898739	True	2885	False	0	73	False	1	69	False	...	41.0	False	0.0	81.0	True	1.0	105.0	True	0.0	94.0
1	1928932285	True	1894	False	0	69	False	1	102	False	...	40.0	False	0.0	28.0	True	1.0	97.0	True	0.0	62.0
2	1928903165	True	2424	False	1	85	False	0	68	False	...	30.0	False	1.0	47.0	True	0.0	58.0	True	1.0	33.0
3	1928907204	True	2588	False	1	71	False	0	73	False	...	30.0	False	1.0	68.0	True	0.0	50.0	True	1.0	21.0
4	1928865084	True	3658	False	1	12	False	0	42	False	...	93.0	False	1.0	67.0	True	0.0	72.0	True	1.0	40.0

5 rows × 63 columns

data.columns

Index(['match_id', 'radiant_win', 'duration', '1is_pick', '1team_id',
       '1hero_id', '2is_pick', '2team_id', '2hero_id', '3is_pick', '3team_id',
       '3hero_id', '4is_pick', '4team_id', '4hero_id', '5is_pick', '5team_id',
       '5hero_id', '6is_pick', '6team_id', '6hero_id', '7is_pick', '7team_id',
       '7hero_id', '8is_pick', '8team_id', '8hero_id', '9is_pick', '9team_id',
       '9hero_id', '10is_pick', '10team_id', '10hero_id', '11is_pick',
       '11team_id', '11hero_id', '12is_pick', '12team_id', '12hero_id',
       '13is_pick', '13team_id', '13hero_id', '14is_pick', '14team_id',
       '14hero_id', '15is_pick', '15team_id', '15hero_id', '16is_pick',
       '16team_id', '16hero_id', '17is_pick', '17team_id', '17hero_id',
       '18is_pick', '18team_id', '18hero_id', '19is_pick', '19team_id',
       '19hero_id', '20is_pick', '20team_id', '20hero_id'],
      dtype='object')

# # # Getting Heroes ID according to the name
heroes = api.get_heroes()

heroes_dict = {}
for i in range (len(heroes['heroes'])):
    hero_id = heroes['heroes'][i]['id']
    hero_name = heroes['heroes'][i]['localized_name']
    heroes_dict[hero_id]= hero_name

def change_hero_name(row):
    if row in heroes_dict:
        return heroes_dict[row]

columns = [
    '1hero_id', 
    '2hero_id', 
    '3hero_id', 
    '4hero_id', 
    '5hero_id', 
    '6hero_id', 
    '7hero_id', 
    '8hero_id', 
    '9hero_id',
    '10hero_id',
    '11hero_id', 
    '12hero_id', 
    '13hero_id', 
    '14hero_id', 
    '15hero_id', 
    '16hero_id', 
    '17hero_id', 
    '18hero_id', 
    '19hero_id',
    '20hero_id'
    ]

for i in range(len(columns)):
    data[columns[i]] = data[columns[i]].apply(change_hero_name)

data.head(5)

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }

	match_id	radiant_win	duration	1is_pick	1team_id	1hero_id	2is_pick	2team_id	2hero_id	3is_pick	...	17hero_id	18is_pick	18team_id	18hero_id	19is_pick	19team_id	19hero_id	20is_pick	20team_id	20hero_id
0	1928898739	True	2885	False	0	Alchemist	False	1	Doom	False	...	Faceless Void	False	0.0	Chaos Knight	True	1.0	Techies	True	0.0	Medusa
1	1928932285	True	1894	False	0	Doom	False	1	Abaddon	False	...	Venomancer	False	0.0	Slardar	True	1.0	Magnus	True	0.0	Bounty Hunter
2	1928903165	True	2424	False	1	Undying	False	0	Ancient Apparition	False	...	Witch Doctor	False	1.0	Viper	True	0.0	Enchantress	True	1.0	Enigma
3	1928907204	True	2588	False	1	Spirit Breaker	False	0	Alchemist	False	...	Witch Doctor	False	1.0	Ancient Apparition	True	0.0	Dazzle	True	1.0	Windranger
4	1928865084	True	3658	False	1	Phantom Lancer	False	0	Wraith King	False	...	Slark	False	1.0	Spectre	True	0.0	Gyrocopter	True	1.0	Venomancer

5 rows × 63 columns

Getting the longest match from the dataset (in minutes)

max_minutes = (np.max(data['duration'])) / 60
max_minutes

161.26666666666668

# # Plotting the graph of match played in minutes
data['minutes'] = data['duration'].apply(lambda x: x / 60)

groupby_minute = data.groupby(['minutes']).count()

groupby_minute.index

Float64Index([0.03333333333333333, 0.26666666666666666, 0.36666666666666664,
               0.5166666666666667,                0.55,  0.5833333333333334,
               0.6333333333333333,                0.65,                 0.8,
               0.8166666666666667,
              ...
               127.83333333333333,              129.05,              136.35,
                           138.05,              138.35,  140.86666666666667,
               142.46666666666667,               143.2,               153.3,
               161.26666666666668],
             dtype='float64', name='minutes', length=5184)

plt.hist(groupby_minute.index, histtype='bar', color='green')

(array([819., 967., 968., 967., 949., 444.,  50.,  11.,   7.,   2.]),
 array([3.33333333e-02, 1.61566667e+01, 3.22800000e+01, 4.84033333e+01,
        6.45266667e+01, 8.06500000e+01, 9.67733333e+01, 1.12896667e+02,
        1.29020000e+02, 1.45143333e+02, 1.61266667e+02]),
 <a list of 10 Patch objects>)

png

Getting the wins on both sides (Radiant / Dire)

As there should be no bias, lets see which side of the map has the most win or is it equal?

base1 = data.loc[data['radiant_win'] == True]

len(base1)

base2 = data.loc[data['radiant_win'] == False]

len(base2)

# # Validation
total = len(base1) + len(base2)
print(total)
print(len(data))

461050
461050

# # Plotting the graph for both sides, it seems they are nearly equal (no bias on the sides here)
temp_a = ['radiant', 'dire']
temp_b = [len(base1), len(base2)]

plt.bar(temp_a, temp_b)

<BarContainer object of 2 artists>

png

Let's Analyze the Draft

alt text

Basic Draft Rules

First team to get ban also gets last ban
First team to get ban gets first pick
Second team to get ban gets second and third pick consecutively
Second team to get ban gets last pick

Getting the first ban heroes

ban_1 = data['1hero_id']

ban1_count = data.groupby(data['1hero_id']).size()

plt.rcParams['figure.figsize'] = 22, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(ban1_count.index, ban1_count.values)

[<matplotlib.lines.Line2D at 0x7f630a86b7b8>]

png

From this figure, we can say that the most banned heroes on the first ban phase from a certain team is Silencer, Undying and Doom.

Getting the second ban heroes

ban2_count = data.groupby(data['2hero_id']).size()

plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(ban2_count.index, ban2_count.values)

[<matplotlib.lines.Line2D at 0x7f630a709e48>]

png

The second ban phase also shows that Silencer, Undying and Doom are the most banned heroes, same as that of first ban phase

Getting the third ban heroes

ban3_count = data.groupby(data['3hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(ban3_count.index, ban3_count.values)

[<matplotlib.lines.Line2D at 0x7f6309da1518>]

png

The third ban phase has a different pattern which shows a rise of Antimage, Bloodseeker, Doom, Earthshaker, Ogre Magi, Phantom Lancer, Spirit Breaker, Strom Spirit and Winter Wyvern, a total of 9 new heroes on ban

Getting the fourth ban heroes

ban4_count = data.groupby(data['4hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(ban4_count.index, ban4_count.values)

[<matplotlib.lines.Line2D at 0x7f6304f7a630>]

png

The fourth ban phase shows similar pattern to the third ban phase with a exclusion of 2 heroes, Queen of Pain and Riki trying to come on top

Getting bans from all phases (1, 2, 3, 4, 9, 10, 11, 12, 17, 18)

heroes_list = []
for i, j in enumerate(heroes_dict):
    hero_name = heroes_dict[j]
    heroes_list.append(hero_name)

total_ban = 10
columns = ['1hero_id', '2hero_id', '3hero_id', '4hero_id', '9hero_id', '10hero_id', '11hero_id', '12hero_id', '17hero_id', '18hero_id']
all_data = pd.DataFrame(columns=columns, index=heroes_list)

# # Sorting alphabetically
all_data.sort_index()

for i in range(total_ban):
    all_data[columns[i]] = data.groupby(data[columns[i]]).size()

all_data

# # For this context, null value is the 0 value (i.e hero is not picked on it's respective phase)
all_data.fillna(value=0)

From this above table, we can see that 4 heroes (Monkey King, Pangolier, Dark Willow and Grimstroke) have 0 bans, it's because heroes api was updated on steam client which fetched the new heroes but weren't in the old meta.

# # Grouping the heroes for plotting purpose
sum_all_data = all_data.sum(axis=1)

sum_all_data

Anti-Mage              199406.0
Axe                     52844.0
Bane                     6910.0
Bloodseeker            134096.0
Crystal Maiden          23970.0
Drow Ranger             11871.0
Earthshaker            132579.0
Juggernaut              41749.0
Mirana                   8839.0
Shadow Fiend           111092.0
Morphling                1401.0
Phantom Lancer         244466.0
Puck                     9647.0
Pudge                   22237.0
Razor                   14361.0
Sand King                5403.0
Storm Spirit           108197.0
Sven                    42068.0
Tiny                    17019.0
Vengeful Spirit          3786.0
Windranger              57866.0
Zeus                    54209.0
Kunkka                   2468.0
Lina                    49600.0
Lich                    23307.0
Lion                    31229.0
Shadow Shaman           13323.0
Slardar                 55222.0
Tidehunter              68210.0
Witch Doctor            71820.0
                         ...   
Nyx Assassin             6991.0
Naga Siren              10244.0
Keeper of the Light      2856.0
Io                      16178.0
Visage                   1393.0
Slark                   55320.0
Medusa                  38105.0
Troll Warlord           12735.0
Centaur Warrunner        8064.0
Magnus                  46969.0
Timbersaw                4686.0
Bristleback             71463.0
Tusk                    63891.0
Skywrath Mage            6864.0
Abaddon                  7679.0
Elder Titan               774.0
Legion Commander        31394.0
Ember Spirit            43802.0
Earth Spirit               11.0
Terrorblade                26.0
Phoenix                  8912.0
Oracle                      1.0
Techies                 16057.0
Winter Wyvern           64459.0
Arc Warden                 17.0
Underlord                   9.0
Monkey King                 0.0
Pangolier                   0.0
Dark Willow                 0.0
Grimstroke                  0.0
Length: 116, dtype: float64

# # Plotting all the ban values

plt.grid()
plt.plot(sum_all_data.index, sum_all_data.values)
plt.xticks(rotation='vertical')
plt.yticks(np.arange(0, 350000, 50000))

([<matplotlib.axis.YTick at 0x7f6304e4e5f8>,
  <matplotlib.axis.YTick at 0x7f6304e4e1d0>,
  <matplotlib.axis.YTick at 0x7f6304e5f278>,
  <matplotlib.axis.YTick at 0x7f6304cefda0>,
  <matplotlib.axis.YTick at 0x7f6304cf7320>,
  <matplotlib.axis.YTick at 0x7f6304cf7860>,
  <matplotlib.axis.YTick at 0x7f6304cf7da0>],
 <a list of 7 Text yticklabel objects>)

png

The above graphs shows the most banned heroes from all the 10 ban phases where most of them shows good distribution except 3 heroes (Phantom Lancer, Silencer and Undying)

Similarly, Getting the first pick heroes

pick1_count = data.groupby(data['5hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(pick1_count.index, pick1_count.values)

[<matplotlib.lines.Line2D at 0x7f6304c41860>]

png

This figure shows that picks are not as bad as ban as there are many option of heroes here to pick. These heroes include Crystal Maiden, Dazzzle, Earthshaker, Undying and Witch Doctor. This also shows that Undying is on both graphs (Pick and Ban) which means that Undying Hero was really popular in the meta

Getting the second picked heroes

pick2_count = data.groupby(data['6hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(pick2_count.index, pick2_count.values)

[<matplotlib.lines.Line2D at 0x7f6304acd710>]

png

Similar to the above graph but with the exception of heroes like Lion an Omniknight.

Getting the third pick heroes

pick3_count = data.groupby(data['7hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(pick3_count.index, pick3_count.values)

[<matplotlib.lines.Line2D at 0x7f630495b438>]

png

As the six and seven pick always belong to the same team, we can conclude that the heroes of this graph and above can be paired on game. Heros like Crystal Maiden, Dazzle, Disruptor, Earthshaker, Lina, Lion, Ogre Magi, Omniknight, Shadow Sharman, Tush, Undying and Witch Doctor

Getting graph for the most pick

total_pick = 10
columns = ['5hero_id', '6hero_id', '7hero_id', '8hero_id', '13hero_id', '14hero_id', '15hero_id', '16hero_id', '19hero_id', '20hero_id']
pick_all_data = pd.DataFrame(columns=columns, index=heroes_list)

pick_all_data

for i in range(total_pick):
    pick_all_data[columns[i]] = data.groupby(data[columns[i]]).size()

# # As in our context, we fill Null values with 0 (i.e, not picked = 0)
pick_all_data.fillna(0)

sum_all_pick_data = pick_all_data.sum(axis=1)

sum_all_pick_data.head(2)

Anti-Mage    62255.0
Axe          60921.0
dtype: float64

plt.grid()
plt.plot(sum_all_pick_data.index, sum_all_pick_data.values)
plt.xticks(rotation='vertical')
plt.yticks(np.arange(0, 300000, 50000))

([<matplotlib.axis.YTick at 0x7f6304761438>,
  <matplotlib.axis.YTick at 0x7f6304819e48>,
  <matplotlib.axis.YTick at 0x7f63047a4b38>,
  <matplotlib.axis.YTick at 0x7f63046c6d68>,
  <matplotlib.axis.YTick at 0x7f63046c0a58>,
  <matplotlib.axis.YTick at 0x7f63047106d8>],
 <a list of 6 Text yticklabel objects>)

png

Although the ban phase was really bad distribution, the pick distribution was actually really good for the meta compared to the ban phase graph, which means that all the heroes got playtime on the game

Getting the most populat hero pairings on the basis of winrate

We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)

We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)

temp = data.loc[data['1team_id'] == 0]
win1 = temp.loc[temp['radiant_win'] == True]
temp2 = data.loc[data['1team_id'] == 1]
win2 = temp2.loc[temp2['radiant_win'] == False]

to_concat_dataframes = [win1, win2]
all_win_data_only_1 = pd.concat(to_concat_dataframes)

temp = data.loc[data['1team_id'] == 0]
win1 = temp.loc[temp['radiant_win'] == False]
temp2 = data.loc[data['1team_id'] == 1]
win2 = temp2.loc[temp2['radiant_win'] == True]

to_concat_dataframes = [win1, win2]
all_win_data_only_2 = pd.concat(to_concat_dataframes)

# # Validation
print(all_win_data_only_1.shape)
print(all_win_data_only_2.shape)
print(all_win_data_only_1.shape[0] + all_win_data_only_2.shape[0])
print(data.shape[0])

(233738, 64)
(227312, 64)
461050
461050

all_win_data_only_1.sort_index()

all_win_data_only_2.sort_index()

# # Getting the hero with the most wins
total_pick_heroes = 10
pick_heroes_range = [5, 8, 14, 16, 19, 6, 7, 13, 15, 20]
change_count = 5
most_picked_hero_win_list = {}
for i in range(total_pick_heroes):
    hero_id = '{}hero_id'.format(pick_heroes_range[i])
    if i >= 5:
        most_win_hero = all_win_data_only_2.groupby([hero_id]).size().sort_values(ascending=False).index[0]
        most_win_hero_stats = all_win_data_only_2.groupby([hero_id]).size().sort_values(ascending=False)[0]
        if most_win_hero in most_picked_hero_win_list:
            most_picked_hero_win_list[most_win_hero] += most_win_hero_stats
        else:
            most_picked_hero_win_list[most_win_hero] = most_win_hero_stats
    else:
        most_win_hero = all_win_data_only_1.groupby([hero_id]).size().sort_values(ascending=False).index[0]
        most_win_hero_stats = all_win_data_only_1.groupby([hero_id]).size().sort_values(ascending=False)[0]
        if most_win_hero in most_picked_hero_win_list:
            most_picked_hero_win_list[most_win_hero] += most_win_hero_stats
        else:
            most_picked_hero_win_list[most_win_hero] = most_win_hero_stats

# # Total games
data.shape[0]
print('Total Games: ', data.shape[0])

Total Games:  461050

most_picked_hero_win_list

{'Earthshaker': 71882,
 'Bristleback': 13658,
 'Gyrocopter': 14346,
 'Windranger': 14129}

# # Validation
71882 + 13658 + 14346 + 14129 < 461050

True

Getting the most winning pairs of 5

# # We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
# # We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id', '16hero_id', '19hero_id']).size()
grouped_win_2 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id', '15hero_id', '20hero_id']).size()

temp_groups = [grouped_win_1, grouped_win_2]

winning_combination_5 = pd.concat(temp_groups)
winning_combination_5 = winning_combination_5.groupby(winning_combination_5.index).sum().sort_values(ascending=False)

(Shadow Fiend, Winter Wyvern, Gyrocopter, Rubick, Ember Spirit)    3
(Earthshaker, Witch Doctor, Spirit Breaker, Anti-Mage, Sven)       3
(Lion, Warlock, Shadow Fiend, Windranger, Invoker)                 3
dtype: int64

max_value = winning_combination_5[0]
winning_combination_5.loc[winning_combination_5.values == max_value]

(Shadow Fiend, Winter Wyvern, Gyrocopter, Rubick, Ember Spirit)             3
(Earthshaker, Witch Doctor, Spirit Breaker, Anti-Mage, Sven)                3
(Lion, Warlock, Shadow Fiend, Windranger, Invoker)                          3
(Earthshaker, Shadow Fiend, Dark Seer, Ember Spirit, Windranger)            3
(Witch Doctor, Crystal Maiden, Bristleback, Sven, Queen of Pain)            3
(Ancient Apparition, Lion, Queen of Pain, Phantom Lancer, Ember Spirit)     3
(Witch Doctor, Spirit Breaker, Io, Tiny, Magnus)                            3
(Witch Doctor, Tidehunter, Enigma, Sven, Shadow Fiend)                      3
(Winter Wyvern, Earthshaker, Clockwerk, Windranger, Ember Spirit)           3
(Shadow Fiend, Earthshaker, Clockwerk, Ancient Apparition, Ember Spirit)    3
(Shadow Fiend, Earthshaker, Gyrocopter, Clockwerk, Bounty Hunter)           3
(Winter Wyvern, Earthshaker, Dark Seer, Shadow Fiend, Windranger)           3
(Omniknight, Crystal Maiden, Bristleback, Phantom Assassin, Viper)          3
(Omniknight, Phantom Assassin, Shadow Shaman, Wraith King, Anti-Mage)       3
(Crystal Maiden, Enigma, Magnus, Sven, Shadow Fiend)                        3
(Crystal Maiden, Vengeful Spirit, Luna, Drow Ranger, Sven)                  3
(Earthshaker, Dazzle, Bristleback, Shadow Fiend, Faceless Void)             3
dtype: int64

# # Validating
winning_combination_5.shape[0] < (118 * 118 * 118 * 118 * 118)

True

Playing the combination of these heroes was the highest (i.e=3). Out of 4 hundred thousand matches, only 6 combinations has the rate of winning and that is only of 3. This shows that picking the best win rate hero doesn't necessarly win the match, there should be good combination of all the heroes of the same team

Getting the most winning pairs of 4

# # Validation
# # C = Combination
# # 5C4 = 5

# # We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
# # We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
# # grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id', '16hero_id', '19hero_id']).size()
grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id', '16hero_id']).size()
grouped_win_2 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id', '19hero_id']).size()
grouped_win_3 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '16hero_id', '19hero_id']).size()
grouped_win_4 = all_win_data_only_1.groupby(['5hero_id', '14hero_id', '16hero_id', '19hero_id']).size()
grouped_win_5 = all_win_data_only_1.groupby(['8hero_id', '14hero_id', '16hero_id', '19hero_id']).size()
# # grouped_win_2 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id', '15hero_id', '20hero_id']).size()
grouped_win_6 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id', '15hero_id']).size()
grouped_win_7 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id', '20hero_id']).size()
grouped_win_8 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '15hero_id', '20hero_id']).size()
grouped_win_9 = all_win_data_only_2.groupby(['6hero_id', '13hero_id', '15hero_id', '20hero_id']).size()
grouped_win_10 = all_win_data_only_2.groupby(['7hero_id', '13hero_id', '15hero_id', '20hero_id']).size()

temp_groups = [grouped_win_1, grouped_win_2, grouped_win_3, grouped_win_4, grouped_win_5, grouped_win_6, grouped_win_7, grouped_win_8, grouped_win_9, grouped_win_10]

winning_combination_4 = pd.concat(temp_groups)
winning_combination_4 = winning_combination_4.groupby(winning_combination_4.index).sum().sort_values(ascending=False)

max_value = winning_combination_4[0]
winning_combination_4.loc[winning_combination_4.values == max_value]

(Crystal Maiden, Vengeful Spirit, Luna, Drow Ranger)    27
dtype: int64

# # Validating
winning_combination_4.shape[0] < (118 * 118 * 118 * 118)

True

Getting the most winning pair of 3

# # We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
# # We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id']).size()
grouped_win_2 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '16hero_id']).size()
grouped_win_3 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '19hero_id']).size()
grouped_win_4 = all_win_data_only_1.groupby(['8hero_id', '14hero_id', '16hero_id']).size()
grouped_win_5 = all_win_data_only_1.groupby(['8hero_id', '14hero_id', '19hero_id']).size()
grouped_win_6 = all_win_data_only_1.groupby(['14hero_id', '16hero_id', '19hero_id']).size()
grouped_win_7 = all_win_data_only_1.groupby(['5hero_id', '16hero_id', '19hero_id']).size()
grouped_win_8 = all_win_data_only_1.groupby(['8hero_id', '16hero_id', '19hero_id']).size()
grouped_win_9 = all_win_data_only_1.groupby(['5hero_id', '14hero_id', '19hero_id']).size()
grouped_win_10 = all_win_data_only_1.groupby(['5hero_id', '14hero_id', '16hero_id']).size()
grouped_win_11 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id']).size()
grouped_win_12 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '15hero_id']).size()
grouped_win_13 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '20hero_id']).size()
grouped_win_14 = all_win_data_only_2.groupby(['7hero_id', '13hero_id', '15hero_id']).size()
grouped_win_15 = all_win_data_only_2.groupby(['7hero_id', '13hero_id', '20hero_id']).size()
grouped_win_16 = all_win_data_only_2.groupby([ '13hero_id', '15hero_id', '20hero_id']).size()
grouped_win_17 = all_win_data_only_2.groupby(['6hero_id', '15hero_id', '20hero_id']).size()
grouped_win_18 = all_win_data_only_2.groupby(['7hero_id', '15hero_id', '20hero_id']).size()
grouped_win_19 = all_win_data_only_2.groupby(['6hero_id', '13hero_id', '20hero_id']).size()
grouped_win_20 = all_win_data_only_2.groupby(['6hero_id', '13hero_id', '15hero_id']).size()

temp_groups = [
    grouped_win_1, grouped_win_2, grouped_win_3, grouped_win_4, grouped_win_5, grouped_win_6, grouped_win_7, grouped_win_8, grouped_win_9, grouped_win_10,
    grouped_win_11, grouped_win_12, grouped_win_13, grouped_win_14, grouped_win_15, grouped_win_16, grouped_win_17, grouped_win_18, grouped_win_19,grouped_win_20
]

winning_combination_3 = pd.concat(temp_groups)
winning_combination_3 = winning_combination_3.groupby(winning_combination_3.index).sum().sort_values(ascending=False)

max_value = winning_combination_3[0]
winning_combination_3.loc[winning_combination_3.values == max_value]

(Earthshaker, Witch Doctor, Windranger)    443
dtype: int64

# # Validating
winning_combination_3.shape[0] < (118 * 118 * 118)

True

Getting the most winning pair of 2

# # We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
# # We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id']).size()
grouped_win_2 = all_win_data_only_1.groupby(['5hero_id', '14hero_id']).size()
grouped_win_3 = all_win_data_only_1.groupby(['5hero_id', '16hero_id']).size()
grouped_win_4 = all_win_data_only_1.groupby(['8hero_id', '19hero_id']).size()
grouped_win_5 = all_win_data_only_1.groupby(['8hero_id', '14hero_id']).size()
grouped_win_6 = all_win_data_only_1.groupby(['8hero_id', '16hero_id']).size()
grouped_win_7 = all_win_data_only_1.groupby(['8hero_id', '19hero_id']).size()
grouped_win_8 = all_win_data_only_1.groupby(['14hero_id', '16hero_id']).size()
grouped_win_9 = all_win_data_only_1.groupby(['14hero_id', '19hero_id']).size()
grouped_win_10 = all_win_data_only_1.groupby(['16hero_id', '19hero_id']).size()
grouped_win_11 = all_win_data_only_2.groupby(['6hero_id', '7hero_id']).size()
grouped_win_12 = all_win_data_only_2.groupby(['6hero_id', '13hero_id']).size()
grouped_win_13 = all_win_data_only_2.groupby(['6hero_id', '15hero_id']).size()
grouped_win_14 = all_win_data_only_2.groupby(['6hero_id', '20hero_id']).size()
grouped_win_15 = all_win_data_only_2.groupby(['7hero_id', '13hero_id']).size()
grouped_win_16 = all_win_data_only_2.groupby([ '7hero_id', '15hero_id']).size()
grouped_win_17 = all_win_data_only_2.groupby(['7hero_id', '20hero_id']).size()
grouped_win_18 = all_win_data_only_2.groupby(['13hero_id', '15hero_id']).size()
grouped_win_19 = all_win_data_only_2.groupby(['13hero_id', '20hero_id']).size()
grouped_win_20 = all_win_data_only_2.groupby(['15hero_id', '20hero_id']).size()

temp_groups = [
    grouped_win_1, grouped_win_2, grouped_win_3, grouped_win_4, grouped_win_5, grouped_win_6, grouped_win_7, grouped_win_8, grouped_win_9, grouped_win_10,
    grouped_win_11, grouped_win_12, grouped_win_13, grouped_win_14, grouped_win_15, grouped_win_16, grouped_win_17, grouped_win_18, grouped_win_19,grouped_win_20
]

winning_combination_2 = pd.concat(temp_groups)

winning_combination_2 = winning_combination_2.groupby(winning_combination_2.index).sum().sort_values(ascending=False)

max_value = winning_combination_2[0]
winning_combination_2.loc[winning_combination_2.values == max_value]

(Earthshaker, Gyrocopter)    8309
dtype: int64

# # Validating
winning_combination_2.shape[0] < (118 * 118)

True

From a Gamers Perspective

Dota totally depends on the meta. This draft is done on 6.* whereas the current meta is on 7.* So, this draft analysis, if used in real life on real games will not be good due to the meta changes.

Notebook Created by: Sulabh Shrestha

- Connect with me on Linkedin & Twitter

Contribute

Issue Tracker: https://github.com/codexponent/dota2-draft-analysis/issues
Source Code: https://github.com/codexponent/dota2-draft-analysis
Contributors: https://github.com/codexponent/dota2-draft-analysis/contributors.txt

Support

If you are having issues, please let us know. I have a mailing list located at: tsulabh4@gmail.com

Copyright and License

Built With

jupyter-notebook