|
|
@@ -2,13 +2,15 @@
|
|
|
"cells": [
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 7,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"import pandas as pd\n",
|
|
|
"import numpy as np\n",
|
|
|
"import json\n",
|
|
|
+ "import matplotlib.pyplot as plt\n",
|
|
|
+ "from sklearn.linear_model import LinearRegression \n",
|
|
|
"\n",
|
|
|
"pd.options.display.max_columns = 2000\n",
|
|
|
"pd.options.display.max_rows = 2000\n",
|
|
|
@@ -34,7 +36,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 2,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
@@ -46,7 +48,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 11,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
@@ -58,24 +60,67 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 34,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "data": {
|
|
|
- "text/plain": [
|
|
|
- "text 220\n",
|
|
|
- "Name: 1337, dtype: int64"
|
|
|
- ]
|
|
|
- },
|
|
|
- "execution_count": 34,
|
|
|
- "metadata": {},
|
|
|
- "output_type": "execute_result"
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"words_freq.loc['1337']"
|
|
|
]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "attachments": {},
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "Messages cumulés sur la durée par membre."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "hist_data = data[['date','from', 'type']]\n",
|
|
|
+ "hist_data['date'] = pd.to_datetime(hist_data['date'], yearfirst=True).dt.date"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "print(data['from'].unique())\n",
|
|
|
+ "btz_core = ['Nathan Spaeter', 'Luyzon', 'Clément Krebs', 't o', 'Leous', 'Arnaud']\n",
|
|
|
+ "btz_ext = [*btz_core, *['Senkei', 'Éléonore', 'Tozpa', 'Léo', 'XxX_MatthieuXPlume_XxX', 'poline', 'Sarah Guillemant']]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "list_scores = {}\n",
|
|
|
+ "for name in btz_core:\n",
|
|
|
+ " idx = hist_data['from']==name\n",
|
|
|
+ " time_range = pd.to_datetime(hist_data[idx]['date'], yearfirst=True).dt.date\n",
|
|
|
+ " cumulative_count = np.arange(time_range.shape[0])\n",
|
|
|
+ " plt.plot(time_range, cumulative_count, label=name)\n",
|
|
|
+ "plt.legend()\n",
|
|
|
+ "plt.show()\n",
|
|
|
+ " "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "hist_data.groupby(['date','from']).size()"
|
|
|
+ ]
|
|
|
}
|
|
|
],
|
|
|
"metadata": {
|
|
|
@@ -94,7 +139,7 @@
|
|
|
"name": "python",
|
|
|
"nbconvert_exporter": "python",
|
|
|
"pygments_lexer": "ipython3",
|
|
|
- "version": "3.10.9"
|
|
|
+ "version": "3.10.9 (main, Dec 19 2022, 17:35:49) [GCC 12.2.0]"
|
|
|
},
|
|
|
"orig_nbformat": 4,
|
|
|
"vscode": {
|