|
|
@@ -8,7 +8,8 @@
|
|
|
"source": [
|
|
|
"import pandas as pd\n",
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
- "import numpy as np"
|
|
|
+ "import numpy as np\n",
|
|
|
+ "import os"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
@@ -17,8 +18,12 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
+ "# Access paths\n",
|
|
|
+ "DATA_PATH = './data'\n",
|
|
|
+ "btz_path = os.path.join(DATA_PATH, 'bretzel.csv')\n",
|
|
|
+ "\n",
|
|
|
"# You need to use the parser before to get the csv file.\n",
|
|
|
- "my_data = pd.read_csv('bretzel.csv', index_col='date', dtype=str)\n",
|
|
|
+ "my_data = pd.read_csv(btz_path, index_col='date', dtype=str)\n",
|
|
|
"my_data = my_data.replace(to_replace=[np.NaN], value='')"
|
|
|
]
|
|
|
},
|
|
|
@@ -84,11 +89,11 @@
|
|
|
"source": [
|
|
|
"def message_length(data, mode='moving', freq='365D'):\n",
|
|
|
" text_message = data[['sender', 'sender_id', 'msg_text']]\n",
|
|
|
- " text_message.sort_index(inplace=True)\n",
|
|
|
+ " text_message = text_message.sort_index()\n",
|
|
|
" for name in btz_core_id:\n",
|
|
|
- " idx = text_message['sender_id']==btz_core_id.get(name)\n",
|
|
|
+ " idx = text_message['sender_id'] == btz_core_id.get(name)\n",
|
|
|
" time_range = pd.to_datetime(data.index[idx], yearfirst=True).to_series().dt.date\n",
|
|
|
- " user_messages = text_message[idx]\n",
|
|
|
+ " user_messages = text_message.loc[idx]\n",
|
|
|
" user_messages_length = user_messages['msg_text'].str.len()\n",
|
|
|
" user_messages_length_no_spaces = user_messages['msg_text'].str.replace(' ', '').str.len()\n",
|
|
|
" print(f\"{name}, {user_messages_length.mean():.2f}, {user_messages_length_no_spaces.mean():.2f}\")\n",
|