Skip to content
This repository was archived by the owner on Jul 15, 2023. It is now read-only.
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions explore_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,34 @@ def find_channel_grouping_revenue(dataset):

return counts, means


def find_transaction_by_region(data):

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this function used anywhere?

""" Find the average transaction revenue by region

args:
dataset (Dataset): the google analytics Dataset

returns:
Dataframe of total transaction revenue by region in ascending order.

"""
train_df = data.train
data = train_df.copy(deep=False)
data['rev'] = data['totals.transactionRevenue'].fillna(0).astype(float)
avg = data.groupby('geoNetwork.region')['rev'].mean()
avg = pd.DataFrame(avg)
new_df = avg[avg['rev']>0]
new_df.columns = ['Transaction']
return new_df.sort_values(by=['Transaction'])

def find_percentage_single_transaction(dataset):

train_df2 = dataset.train
train_df2 = dataset.train.copy()
train_df2.set_index('fullVisitorId', inplace=True)
Comment thread
jonathancstroud marked this conversation as resolved.
continents = train_df2['geoNetwork.continent'].unique()
result = ""
for con in continents:
Comment thread
jonathancstroud marked this conversation as resolved.
number_of_people = train_df2[train_df2['geoNetwork.continent'] == con]['visitNumber'].count()
train_df3 = train_df2[train_df2['geoNetwork.continent'] == con]['visitNumber']
number_of_single_visits = train_df3[train_df3 == 1].count()
print('Percentage of people visited for once in '+ con + ': '+(str)((number_of_single_visits/number_of_people)*100))
number_of_single_visits = train_df2[(train_df2['geoNetwork.continent'] == con) & (train_df2['visitNumber']== 1)]['visitNumber'].count()
result = result + 'Percentage of people visited for once in '+ con + ': '+(str)("%.2f" %((number_of_single_visits/number_of_people)*100)) +'\n'

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than returning a string, you should return a value and take care of printing in explore.py.

return result