Browse Source

chemin d'accès pour données

arnaud 2 years ago
parent
commit
11b8ef6bee
1 changed files with 10 additions and 5 deletions
  1. 10 5
      recap.ipynb

+ 10 - 5
recap.ipynb

@@ -8,7 +8,8 @@
    "source": [
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
-    "import numpy as np"
+    "import numpy as np\n",
+    "import os"
    ]
   },
   {
@@ -17,8 +18,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Access paths\n",
+    "DATA_PATH = './data'\n",
+    "btz_path = os.path.join(DATA_PATH, 'bretzel.csv')\n",
+    "\n",
     "# You need to use the parser before to get the csv file.\n",
-    "my_data = pd.read_csv('bretzel.csv', index_col='date', dtype=str)\n",
+    "my_data = pd.read_csv(btz_path, index_col='date', dtype=str)\n",
     "my_data = my_data.replace(to_replace=[np.NaN], value='')"
    ]
   },
@@ -84,11 +89,11 @@
    "source": [
     "def message_length(data, mode='moving', freq='365D'):\n",
     "    text_message = data[['sender', 'sender_id', 'msg_text']]\n",
-    "    text_message.sort_index(inplace=True)\n",
+    "    text_message = text_message.sort_index()\n",
     "    for name in btz_core_id:\n",
-    "        idx = text_message['sender_id']==btz_core_id.get(name)\n",
+    "        idx = text_message['sender_id'] == btz_core_id.get(name)\n",
     "        time_range = pd.to_datetime(data.index[idx], yearfirst=True).to_series().dt.date\n",
-    "        user_messages = text_message[idx]\n",
+    "        user_messages = text_message.loc[idx]\n",
     "        user_messages_length = user_messages['msg_text'].str.len()\n",
     "        user_messages_length_no_spaces = user_messages['msg_text'].str.replace(' ', '').str.len()\n",
     "        print(f\"{name}, {user_messages_length.mean():.2f}, {user_messages_length_no_spaces.mean():.2f}\")\n",