Draft Analysis of Dota 2
# # Importing Packages
import os
import copy
import config
import dota2api
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
%matplotlib inline
# # STEAM Client API Key to access the dota2 api
API_KEY = config.STEAM_API_KEY
api = dota2api.Initialise(API_KEY)
data = pd.read_csv('../data/draft/capmodedata.csv')
data.head(5)
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| match_id | radiant_win | duration | 1is_pick | 1team_id | 1hero_id | 2is_pick | 2team_id | 2hero_id | 3is_pick | ... | 17hero_id | 18is_pick | 18team_id | 18hero_id | 19is_pick | 19team_id | 19hero_id | 20is_pick | 20team_id | 20hero_id | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1928898739 | True | 2885 | False | 0 | 73 | False | 1 | 69 | False | ... | 41.0 | False | 0.0 | 81.0 | True | 1.0 | 105.0 | True | 0.0 | 94.0 |
| 1 | 1928932285 | True | 1894 | False | 0 | 69 | False | 1 | 102 | False | ... | 40.0 | False | 0.0 | 28.0 | True | 1.0 | 97.0 | True | 0.0 | 62.0 |
| 2 | 1928903165 | True | 2424 | False | 1 | 85 | False | 0 | 68 | False | ... | 30.0 | False | 1.0 | 47.0 | True | 0.0 | 58.0 | True | 1.0 | 33.0 |
| 3 | 1928907204 | True | 2588 | False | 1 | 71 | False | 0 | 73 | False | ... | 30.0 | False | 1.0 | 68.0 | True | 0.0 | 50.0 | True | 1.0 | 21.0 |
| 4 | 1928865084 | True | 3658 | False | 1 | 12 | False | 0 | 42 | False | ... | 93.0 | False | 1.0 | 67.0 | True | 0.0 | 72.0 | True | 1.0 | 40.0 |
5 rows × 63 columns
data.columns
Index(['match_id', 'radiant_win', 'duration', '1is_pick', '1team_id',
'1hero_id', '2is_pick', '2team_id', '2hero_id', '3is_pick', '3team_id',
'3hero_id', '4is_pick', '4team_id', '4hero_id', '5is_pick', '5team_id',
'5hero_id', '6is_pick', '6team_id', '6hero_id', '7is_pick', '7team_id',
'7hero_id', '8is_pick', '8team_id', '8hero_id', '9is_pick', '9team_id',
'9hero_id', '10is_pick', '10team_id', '10hero_id', '11is_pick',
'11team_id', '11hero_id', '12is_pick', '12team_id', '12hero_id',
'13is_pick', '13team_id', '13hero_id', '14is_pick', '14team_id',
'14hero_id', '15is_pick', '15team_id', '15hero_id', '16is_pick',
'16team_id', '16hero_id', '17is_pick', '17team_id', '17hero_id',
'18is_pick', '18team_id', '18hero_id', '19is_pick', '19team_id',
'19hero_id', '20is_pick', '20team_id', '20hero_id'],
dtype='object')
# # # Getting Heroes ID according to the name
heroes = api.get_heroes()
heroes_dict = {}
for i in range (len(heroes['heroes'])):
hero_id = heroes['heroes'][i]['id']
hero_name = heroes['heroes'][i]['localized_name']
heroes_dict[hero_id]= hero_name
def change_hero_name(row):
if row in heroes_dict:
return heroes_dict[row]
columns = [
'1hero_id',
'2hero_id',
'3hero_id',
'4hero_id',
'5hero_id',
'6hero_id',
'7hero_id',
'8hero_id',
'9hero_id',
'10hero_id',
'11hero_id',
'12hero_id',
'13hero_id',
'14hero_id',
'15hero_id',
'16hero_id',
'17hero_id',
'18hero_id',
'19hero_id',
'20hero_id'
]
for i in range(len(columns)):
data[columns[i]] = data[columns[i]].apply(change_hero_name)
data.head(5)
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| match_id | radiant_win | duration | 1is_pick | 1team_id | 1hero_id | 2is_pick | 2team_id | 2hero_id | 3is_pick | ... | 17hero_id | 18is_pick | 18team_id | 18hero_id | 19is_pick | 19team_id | 19hero_id | 20is_pick | 20team_id | 20hero_id | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1928898739 | True | 2885 | False | 0 | Alchemist | False | 1 | Doom | False | ... | Faceless Void | False | 0.0 | Chaos Knight | True | 1.0 | Techies | True | 0.0 | Medusa |
| 1 | 1928932285 | True | 1894 | False | 0 | Doom | False | 1 | Abaddon | False | ... | Venomancer | False | 0.0 | Slardar | True | 1.0 | Magnus | True | 0.0 | Bounty Hunter |
| 2 | 1928903165 | True | 2424 | False | 1 | Undying | False | 0 | Ancient Apparition | False | ... | Witch Doctor | False | 1.0 | Viper | True | 0.0 | Enchantress | True | 1.0 | Enigma |
| 3 | 1928907204 | True | 2588 | False | 1 | Spirit Breaker | False | 0 | Alchemist | False | ... | Witch Doctor | False | 1.0 | Ancient Apparition | True | 0.0 | Dazzle | True | 1.0 | Windranger |
| 4 | 1928865084 | True | 3658 | False | 1 | Phantom Lancer | False | 0 | Wraith King | False | ... | Slark | False | 1.0 | Spectre | True | 0.0 | Gyrocopter | True | 1.0 | Venomancer |
5 rows × 63 columns
Getting the longest match from the dataset (in minutes)
max_minutes = (np.max(data['duration'])) / 60
max_minutes
161.26666666666668
# # Plotting the graph of match played in minutes
data['minutes'] = data['duration'].apply(lambda x: x / 60)
groupby_minute = data.groupby(['minutes']).count()
groupby_minute.index
Float64Index([0.03333333333333333, 0.26666666666666666, 0.36666666666666664,
0.5166666666666667, 0.55, 0.5833333333333334,
0.6333333333333333, 0.65, 0.8,
0.8166666666666667,
...
127.83333333333333, 129.05, 136.35,
138.05, 138.35, 140.86666666666667,
142.46666666666667, 143.2, 153.3,
161.26666666666668],
dtype='float64', name='minutes', length=5184)
plt.hist(groupby_minute.index, histtype='bar', color='green')
(array([819., 967., 968., 967., 949., 444., 50., 11., 7., 2.]),
array([3.33333333e-02, 1.61566667e+01, 3.22800000e+01, 4.84033333e+01,
6.45266667e+01, 8.06500000e+01, 9.67733333e+01, 1.12896667e+02,
1.29020000e+02, 1.45143333e+02, 1.61266667e+02]),
<a list of 10 Patch objects>)

Getting the wins on both sides (Radiant / Dire)
As there should be no bias, lets see which side of the map has the most win or is it equal?
base1 = data.loc[data['radiant_win'] == True]
len(base1)
233537
base2 = data.loc[data['radiant_win'] == False]
len(base2)
227513
# # Validation
total = len(base1) + len(base2)
print(total)
print(len(data))
461050
461050
# # Plotting the graph for both sides, it seems they are nearly equal (no bias on the sides here)
temp_a = ['radiant', 'dire']
temp_b = [len(base1), len(base2)]
plt.bar(temp_a, temp_b)
<BarContainer object of 2 artists>

Let's Analyze the Draft

Basic Draft Rules
- First team to get ban also gets last ban
- First team to get ban gets first pick
- Second team to get ban gets second and third pick consecutively
- Second team to get ban gets last pick
Getting the first ban heroes
ban_1 = data['1hero_id']
ban1_count = data.groupby(data['1hero_id']).size()
plt.rcParams['figure.figsize'] = 22, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(ban1_count.index, ban1_count.values)
[<matplotlib.lines.Line2D at 0x7f630a86b7b8>]

From this figure, we can say that the most banned heroes on the first ban phase from a certain team is Silencer, Undying and Doom.
Getting the second ban heroes
ban2_count = data.groupby(data['2hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(ban2_count.index, ban2_count.values)
[<matplotlib.lines.Line2D at 0x7f630a709e48>]

The second ban phase also shows that Silencer, Undying and Doom are the most banned heroes, same as that of first ban phase
Getting the third ban heroes
ban3_count = data.groupby(data['3hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(ban3_count.index, ban3_count.values)
[<matplotlib.lines.Line2D at 0x7f6309da1518>]

The third ban phase has a different pattern which shows a rise of Antimage, Bloodseeker, Doom, Earthshaker, Ogre Magi, Phantom Lancer, Spirit Breaker, Strom Spirit and Winter Wyvern, a total of 9 new heroes on ban
Getting the fourth ban heroes
ban4_count = data.groupby(data['4hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(ban4_count.index, ban4_count.values)
[<matplotlib.lines.Line2D at 0x7f6304f7a630>]

The fourth ban phase shows similar pattern to the third ban phase with a exclusion of 2 heroes, Queen of Pain and Riki trying to come on top
Getting bans from all phases (1, 2, 3, 4, 9, 10, 11, 12, 17, 18)
heroes_list = []
for i, j in enumerate(heroes_dict):
hero_name = heroes_dict[j]
heroes_list.append(hero_name)
total_ban = 10
columns = ['1hero_id', '2hero_id', '3hero_id', '4hero_id', '9hero_id', '10hero_id', '11hero_id', '12hero_id', '17hero_id', '18hero_id']
all_data = pd.DataFrame(columns=columns, index=heroes_list)
# # Sorting alphabetically
all_data.sort_index()
for i in range(total_ban):
all_data[columns[i]] = data.groupby(data[columns[i]]).size()
all_data
# # For this context, null value is the 0 value (i.e hero is not picked on it's respective phase)
all_data.fillna(value=0)
From this above table, we can see that 4 heroes (Monkey King, Pangolier, Dark Willow and Grimstroke) have 0 bans, it's because heroes api was updated on steam client which fetched the new heroes but weren't in the old meta.
# # Grouping the heroes for plotting purpose
sum_all_data = all_data.sum(axis=1)
sum_all_data
Anti-Mage 199406.0
Axe 52844.0
Bane 6910.0
Bloodseeker 134096.0
Crystal Maiden 23970.0
Drow Ranger 11871.0
Earthshaker 132579.0
Juggernaut 41749.0
Mirana 8839.0
Shadow Fiend 111092.0
Morphling 1401.0
Phantom Lancer 244466.0
Puck 9647.0
Pudge 22237.0
Razor 14361.0
Sand King 5403.0
Storm Spirit 108197.0
Sven 42068.0
Tiny 17019.0
Vengeful Spirit 3786.0
Windranger 57866.0
Zeus 54209.0
Kunkka 2468.0
Lina 49600.0
Lich 23307.0
Lion 31229.0
Shadow Shaman 13323.0
Slardar 55222.0
Tidehunter 68210.0
Witch Doctor 71820.0
...
Nyx Assassin 6991.0
Naga Siren 10244.0
Keeper of the Light 2856.0
Io 16178.0
Visage 1393.0
Slark 55320.0
Medusa 38105.0
Troll Warlord 12735.0
Centaur Warrunner 8064.0
Magnus 46969.0
Timbersaw 4686.0
Bristleback 71463.0
Tusk 63891.0
Skywrath Mage 6864.0
Abaddon 7679.0
Elder Titan 774.0
Legion Commander 31394.0
Ember Spirit 43802.0
Earth Spirit 11.0
Terrorblade 26.0
Phoenix 8912.0
Oracle 1.0
Techies 16057.0
Winter Wyvern 64459.0
Arc Warden 17.0
Underlord 9.0
Monkey King 0.0
Pangolier 0.0
Dark Willow 0.0
Grimstroke 0.0
Length: 116, dtype: float64
# # Plotting all the ban values
plt.grid()
plt.plot(sum_all_data.index, sum_all_data.values)
plt.xticks(rotation='vertical')
plt.yticks(np.arange(0, 350000, 50000))
([<matplotlib.axis.YTick at 0x7f6304e4e5f8>,
<matplotlib.axis.YTick at 0x7f6304e4e1d0>,
<matplotlib.axis.YTick at 0x7f6304e5f278>,
<matplotlib.axis.YTick at 0x7f6304cefda0>,
<matplotlib.axis.YTick at 0x7f6304cf7320>,
<matplotlib.axis.YTick at 0x7f6304cf7860>,
<matplotlib.axis.YTick at 0x7f6304cf7da0>],
<a list of 7 Text yticklabel objects>)

The above graphs shows the most banned heroes from all the 10 ban phases where most of them shows good distribution except 3 heroes (Phantom Lancer, Silencer and Undying)
Similarly, Getting the first pick heroes
pick1_count = data.groupby(data['5hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(pick1_count.index, pick1_count.values)
[<matplotlib.lines.Line2D at 0x7f6304c41860>]

This figure shows that picks are not as bad as ban as there are many option of heroes here to pick. These heroes include Crystal Maiden, Dazzzle, Earthshaker, Undying and Witch Doctor. This also shows that Undying is on both graphs (Pick and Ban) which means that Undying Hero was really popular in the meta
Getting the second picked heroes
pick2_count = data.groupby(data['6hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(pick2_count.index, pick2_count.values)
[<matplotlib.lines.Line2D at 0x7f6304acd710>]

Similar to the above graph but with the exception of heroes like Lion an Omniknight.
Getting the third pick heroes
pick3_count = data.groupby(data['7hero_id']).size()
plt.rcParams['figure.figsize'] = 20, 6
plt.xticks(rotation='vertical')
plt.grid()
plt.plot(pick3_count.index, pick3_count.values)
[<matplotlib.lines.Line2D at 0x7f630495b438>]

As the six and seven pick always belong to the same team, we can conclude that the heroes of this graph and above can be paired on game. Heros like Crystal Maiden, Dazzle, Disruptor, Earthshaker, Lina, Lion, Ogre Magi, Omniknight, Shadow Sharman, Tush, Undying and Witch Doctor
Getting graph for the most pick
total_pick = 10
columns = ['5hero_id', '6hero_id', '7hero_id', '8hero_id', '13hero_id', '14hero_id', '15hero_id', '16hero_id', '19hero_id', '20hero_id']
pick_all_data = pd.DataFrame(columns=columns, index=heroes_list)
pick_all_data
for i in range(total_pick):
pick_all_data[columns[i]] = data.groupby(data[columns[i]]).size()
# # As in our context, we fill Null values with 0 (i.e, not picked = 0)
pick_all_data.fillna(0)
sum_all_pick_data = pick_all_data.sum(axis=1)
sum_all_pick_data.head(2)
Anti-Mage 62255.0
Axe 60921.0
dtype: float64
plt.grid()
plt.plot(sum_all_pick_data.index, sum_all_pick_data.values)
plt.xticks(rotation='vertical')
plt.yticks(np.arange(0, 300000, 50000))
([<matplotlib.axis.YTick at 0x7f6304761438>,
<matplotlib.axis.YTick at 0x7f6304819e48>,
<matplotlib.axis.YTick at 0x7f63047a4b38>,
<matplotlib.axis.YTick at 0x7f63046c6d68>,
<matplotlib.axis.YTick at 0x7f63046c0a58>,
<matplotlib.axis.YTick at 0x7f63047106d8>],
<a list of 6 Text yticklabel objects>)

Although the ban phase was really bad distribution, the pick distribution was actually really good for the meta compared to the ban phase graph, which means that all the heroes got playtime on the game
Getting the most populat hero pairings on the basis of winrate
We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
temp = data.loc[data['1team_id'] == 0]
win1 = temp.loc[temp['radiant_win'] == True]
temp2 = data.loc[data['1team_id'] == 1]
win2 = temp2.loc[temp2['radiant_win'] == False]
to_concat_dataframes = [win1, win2]
all_win_data_only_1 = pd.concat(to_concat_dataframes)
temp = data.loc[data['1team_id'] == 0]
win1 = temp.loc[temp['radiant_win'] == False]
temp2 = data.loc[data['1team_id'] == 1]
win2 = temp2.loc[temp2['radiant_win'] == True]
to_concat_dataframes = [win1, win2]
all_win_data_only_2 = pd.concat(to_concat_dataframes)
# # Validation
print(all_win_data_only_1.shape)
print(all_win_data_only_2.shape)
print(all_win_data_only_1.shape[0] + all_win_data_only_2.shape[0])
print(data.shape[0])
(233738, 64)
(227312, 64)
461050
461050
all_win_data_only_1.sort_index()
all_win_data_only_2.sort_index()
# # Getting the hero with the most wins
total_pick_heroes = 10
pick_heroes_range = [5, 8, 14, 16, 19, 6, 7, 13, 15, 20]
change_count = 5
most_picked_hero_win_list = {}
for i in range(total_pick_heroes):
hero_id = '{}hero_id'.format(pick_heroes_range[i])
if i >= 5:
most_win_hero = all_win_data_only_2.groupby([hero_id]).size().sort_values(ascending=False).index[0]
most_win_hero_stats = all_win_data_only_2.groupby([hero_id]).size().sort_values(ascending=False)[0]
if most_win_hero in most_picked_hero_win_list:
most_picked_hero_win_list[most_win_hero] += most_win_hero_stats
else:
most_picked_hero_win_list[most_win_hero] = most_win_hero_stats
else:
most_win_hero = all_win_data_only_1.groupby([hero_id]).size().sort_values(ascending=False).index[0]
most_win_hero_stats = all_win_data_only_1.groupby([hero_id]).size().sort_values(ascending=False)[0]
if most_win_hero in most_picked_hero_win_list:
most_picked_hero_win_list[most_win_hero] += most_win_hero_stats
else:
most_picked_hero_win_list[most_win_hero] = most_win_hero_stats
# # Total games
data.shape[0]
print('Total Games: ', data.shape[0])
Total Games: 461050
most_picked_hero_win_list
{'Earthshaker': 71882,
'Bristleback': 13658,
'Gyrocopter': 14346,
'Windranger': 14129}
# # Validation
71882 + 13658 + 14346 + 14129 < 461050
True
Getting the most winning pairs of 5
# # We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
# # We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id', '16hero_id', '19hero_id']).size()
grouped_win_2 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id', '15hero_id', '20hero_id']).size()
temp_groups = [grouped_win_1, grouped_win_2]
winning_combination_5 = pd.concat(temp_groups)
winning_combination_5 = winning_combination_5.groupby(winning_combination_5.index).sum().sort_values(ascending=False)
(Shadow Fiend, Winter Wyvern, Gyrocopter, Rubick, Ember Spirit) 3
(Earthshaker, Witch Doctor, Spirit Breaker, Anti-Mage, Sven) 3
(Lion, Warlock, Shadow Fiend, Windranger, Invoker) 3
dtype: int64
max_value = winning_combination_5[0]
winning_combination_5.loc[winning_combination_5.values == max_value]
(Shadow Fiend, Winter Wyvern, Gyrocopter, Rubick, Ember Spirit) 3
(Earthshaker, Witch Doctor, Spirit Breaker, Anti-Mage, Sven) 3
(Lion, Warlock, Shadow Fiend, Windranger, Invoker) 3
(Earthshaker, Shadow Fiend, Dark Seer, Ember Spirit, Windranger) 3
(Witch Doctor, Crystal Maiden, Bristleback, Sven, Queen of Pain) 3
(Ancient Apparition, Lion, Queen of Pain, Phantom Lancer, Ember Spirit) 3
(Witch Doctor, Spirit Breaker, Io, Tiny, Magnus) 3
(Witch Doctor, Tidehunter, Enigma, Sven, Shadow Fiend) 3
(Winter Wyvern, Earthshaker, Clockwerk, Windranger, Ember Spirit) 3
(Shadow Fiend, Earthshaker, Clockwerk, Ancient Apparition, Ember Spirit) 3
(Shadow Fiend, Earthshaker, Gyrocopter, Clockwerk, Bounty Hunter) 3
(Winter Wyvern, Earthshaker, Dark Seer, Shadow Fiend, Windranger) 3
(Omniknight, Crystal Maiden, Bristleback, Phantom Assassin, Viper) 3
(Omniknight, Phantom Assassin, Shadow Shaman, Wraith King, Anti-Mage) 3
(Crystal Maiden, Enigma, Magnus, Sven, Shadow Fiend) 3
(Crystal Maiden, Vengeful Spirit, Luna, Drow Ranger, Sven) 3
(Earthshaker, Dazzle, Bristleback, Shadow Fiend, Faceless Void) 3
dtype: int64
# # Validating
winning_combination_5.shape[0] < (118 * 118 * 118 * 118 * 118)
True
Playing the combination of these heroes was the highest (i.e=3). Out of 4 hundred thousand matches, only 6 combinations has the rate of winning and that is only of 3. This shows that picking the best win rate hero doesn't necessarly win the match, there should be good combination of all the heroes of the same team
Getting the most winning pairs of 4
# # Validation
# # C = Combination
# # 5C4 = 5
# # We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
# # We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
# # grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id', '16hero_id', '19hero_id']).size()
grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id', '16hero_id']).size()
grouped_win_2 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id', '19hero_id']).size()
grouped_win_3 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '16hero_id', '19hero_id']).size()
grouped_win_4 = all_win_data_only_1.groupby(['5hero_id', '14hero_id', '16hero_id', '19hero_id']).size()
grouped_win_5 = all_win_data_only_1.groupby(['8hero_id', '14hero_id', '16hero_id', '19hero_id']).size()
# # grouped_win_2 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id', '15hero_id', '20hero_id']).size()
grouped_win_6 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id', '15hero_id']).size()
grouped_win_7 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id', '20hero_id']).size()
grouped_win_8 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '15hero_id', '20hero_id']).size()
grouped_win_9 = all_win_data_only_2.groupby(['6hero_id', '13hero_id', '15hero_id', '20hero_id']).size()
grouped_win_10 = all_win_data_only_2.groupby(['7hero_id', '13hero_id', '15hero_id', '20hero_id']).size()
temp_groups = [grouped_win_1, grouped_win_2, grouped_win_3, grouped_win_4, grouped_win_5, grouped_win_6, grouped_win_7, grouped_win_8, grouped_win_9, grouped_win_10]
winning_combination_4 = pd.concat(temp_groups)
winning_combination_4 = winning_combination_4.groupby(winning_combination_4.index).sum().sort_values(ascending=False)
max_value = winning_combination_4[0]
winning_combination_4.loc[winning_combination_4.values == max_value]
(Crystal Maiden, Vengeful Spirit, Luna, Drow Ranger) 27
dtype: int64
# # Validating
winning_combination_4.shape[0] < (118 * 118 * 118 * 118)
True
Getting the most winning pair of 3
# # We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
# # We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '14hero_id']).size()
grouped_win_2 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '16hero_id']).size()
grouped_win_3 = all_win_data_only_1.groupby(['5hero_id', '8hero_id', '19hero_id']).size()
grouped_win_4 = all_win_data_only_1.groupby(['8hero_id', '14hero_id', '16hero_id']).size()
grouped_win_5 = all_win_data_only_1.groupby(['8hero_id', '14hero_id', '19hero_id']).size()
grouped_win_6 = all_win_data_only_1.groupby(['14hero_id', '16hero_id', '19hero_id']).size()
grouped_win_7 = all_win_data_only_1.groupby(['5hero_id', '16hero_id', '19hero_id']).size()
grouped_win_8 = all_win_data_only_1.groupby(['8hero_id', '16hero_id', '19hero_id']).size()
grouped_win_9 = all_win_data_only_1.groupby(['5hero_id', '14hero_id', '19hero_id']).size()
grouped_win_10 = all_win_data_only_1.groupby(['5hero_id', '14hero_id', '16hero_id']).size()
grouped_win_11 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '13hero_id']).size()
grouped_win_12 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '15hero_id']).size()
grouped_win_13 = all_win_data_only_2.groupby(['6hero_id', '7hero_id', '20hero_id']).size()
grouped_win_14 = all_win_data_only_2.groupby(['7hero_id', '13hero_id', '15hero_id']).size()
grouped_win_15 = all_win_data_only_2.groupby(['7hero_id', '13hero_id', '20hero_id']).size()
grouped_win_16 = all_win_data_only_2.groupby([ '13hero_id', '15hero_id', '20hero_id']).size()
grouped_win_17 = all_win_data_only_2.groupby(['6hero_id', '15hero_id', '20hero_id']).size()
grouped_win_18 = all_win_data_only_2.groupby(['7hero_id', '15hero_id', '20hero_id']).size()
grouped_win_19 = all_win_data_only_2.groupby(['6hero_id', '13hero_id', '20hero_id']).size()
grouped_win_20 = all_win_data_only_2.groupby(['6hero_id', '13hero_id', '15hero_id']).size()
temp_groups = [
grouped_win_1, grouped_win_2, grouped_win_3, grouped_win_4, grouped_win_5, grouped_win_6, grouped_win_7, grouped_win_8, grouped_win_9, grouped_win_10,
grouped_win_11, grouped_win_12, grouped_win_13, grouped_win_14, grouped_win_15, grouped_win_16, grouped_win_17, grouped_win_18, grouped_win_19,grouped_win_20
]
winning_combination_3 = pd.concat(temp_groups)
winning_combination_3 = winning_combination_3.groupby(winning_combination_3.index).sum().sort_values(ascending=False)
max_value = winning_combination_3[0]
winning_combination_3.loc[winning_combination_3.values == max_value]
(Earthshaker, Witch Doctor, Windranger) 443
dtype: int64
# # Validating
winning_combination_3.shape[0] < (118 * 118 * 118)
True
Getting the most winning pair of 2
# # We can take win rates for 1st picked team. (i.e: 5, 8, 14, 16, 19)
# # We can take win rates for 2nd picked team. (i.e: 6, 7, 13, 15, 20)
grouped_win_1 = all_win_data_only_1.groupby(['5hero_id', '8hero_id']).size()
grouped_win_2 = all_win_data_only_1.groupby(['5hero_id', '14hero_id']).size()
grouped_win_3 = all_win_data_only_1.groupby(['5hero_id', '16hero_id']).size()
grouped_win_4 = all_win_data_only_1.groupby(['8hero_id', '19hero_id']).size()
grouped_win_5 = all_win_data_only_1.groupby(['8hero_id', '14hero_id']).size()
grouped_win_6 = all_win_data_only_1.groupby(['8hero_id', '16hero_id']).size()
grouped_win_7 = all_win_data_only_1.groupby(['8hero_id', '19hero_id']).size()
grouped_win_8 = all_win_data_only_1.groupby(['14hero_id', '16hero_id']).size()
grouped_win_9 = all_win_data_only_1.groupby(['14hero_id', '19hero_id']).size()
grouped_win_10 = all_win_data_only_1.groupby(['16hero_id', '19hero_id']).size()
grouped_win_11 = all_win_data_only_2.groupby(['6hero_id', '7hero_id']).size()
grouped_win_12 = all_win_data_only_2.groupby(['6hero_id', '13hero_id']).size()
grouped_win_13 = all_win_data_only_2.groupby(['6hero_id', '15hero_id']).size()
grouped_win_14 = all_win_data_only_2.groupby(['6hero_id', '20hero_id']).size()
grouped_win_15 = all_win_data_only_2.groupby(['7hero_id', '13hero_id']).size()
grouped_win_16 = all_win_data_only_2.groupby([ '7hero_id', '15hero_id']).size()
grouped_win_17 = all_win_data_only_2.groupby(['7hero_id', '20hero_id']).size()
grouped_win_18 = all_win_data_only_2.groupby(['13hero_id', '15hero_id']).size()
grouped_win_19 = all_win_data_only_2.groupby(['13hero_id', '20hero_id']).size()
grouped_win_20 = all_win_data_only_2.groupby(['15hero_id', '20hero_id']).size()
temp_groups = [
grouped_win_1, grouped_win_2, grouped_win_3, grouped_win_4, grouped_win_5, grouped_win_6, grouped_win_7, grouped_win_8, grouped_win_9, grouped_win_10,
grouped_win_11, grouped_win_12, grouped_win_13, grouped_win_14, grouped_win_15, grouped_win_16, grouped_win_17, grouped_win_18, grouped_win_19,grouped_win_20
]
winning_combination_2 = pd.concat(temp_groups)
winning_combination_2 = winning_combination_2.groupby(winning_combination_2.index).sum().sort_values(ascending=False)
max_value = winning_combination_2[0]
winning_combination_2.loc[winning_combination_2.values == max_value]
(Earthshaker, Gyrocopter) 8309
dtype: int64
# # Validating
winning_combination_2.shape[0] < (118 * 118)
True
From a Gamers Perspective
Dota totally depends on the meta. This draft is done on 6.* whereas the current meta is on 7.* So, this draft analysis, if used in real life on real games will not be good due to the meta changes.
Notebook Created by: Sulabh Shrestha
- Connect with me on Linkedin & Twitter
Contribute
- Issue Tracker: https://github.com/codexponent/dota2-draft-analysis/issues
- Source Code: https://github.com/codexponent/dota2-draft-analysis
- Contributors: https://github.com/codexponent/dota2-draft-analysis/contributors.txt
Support
If you are having issues, please let us know. I have a mailing list located at: tsulabh4@gmail.com
Copyright and License
Copyright 2018 Codexponent. Code released under the [MIT]license.
Built With
- jupyter-notebook
Log in or sign up for Devpost to join the conversation.