Skip to content
This repository was archived by the owner on Jul 15, 2023. It is now read-only.
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions explore_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,14 @@ def find_channel_grouping_revenue(dataset):
means = {grouping: np.mean(df[df['channelGrouping'] == grouping]['totals.transactionRevenue'].astype('int64')) / 10000 for grouping in groupings}

return counts, means

def find_percentage_single_transaction(dataset):

train_df2 = dataset.train
train_df2.set_index('fullVisitorId', inplace=True)
Comment thread
jonathancstroud marked this conversation as resolved.
continents = train_df2['geoNetwork.continent'].unique()
for con in continents:
Comment thread
jonathancstroud marked this conversation as resolved.
number_of_people = train_df2[train_df2['geoNetwork.continent'] == con]['visitNumber'].count()
train_df3 = train_df2[train_df2['geoNetwork.continent'] == con]['visitNumber']
number_of_single_visits = train_df3[train_df3 == 1].count()
print('Percentage of people visited for once in '+ con + ': '+(str)((number_of_single_visits/number_of_people)*100))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of printing your results here, please call this function from explore.py and print the results there.