File size: 26,290 Bytes
121a1b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from os import listdir\n",
    "from os.path import isfile, join\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set path and load data\n",
    "path = '/Users/lsacy/data/HOPE_WSDM_2022/Train/'\n",
    "path2 = '/Users/lsacy/data/HOPE_WSDM_2022/Test/'\n",
    "path3 = '/Users/lsacy/data/HOPE_WSDM_2022/Validation/'\n",
    "\n",
    "\n",
    "files = [f for f in listdir(path) if isfile(join(path, f))]\n",
    "files2 = [f for f in listdir(path2) if isfile(join(path2, f))]\n",
    "files3 = [f for f in listdir(path3) if isfile(join(path3, f))]\n",
    "\n",
    "df_list = []\n",
    "for file in files:\n",
    "    df = pd.read_csv(path+file)\n",
    "    df_list.append(df)\n",
    "\n",
    "for file in files2:\n",
    "    df = pd.read_csv(path2+file)\n",
    "    df_list.append(df)\n",
    "\n",
    "for file in files3:\n",
    "    df= pd.read_csv(path3+file)\n",
    "    df_list.append(df)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [],
   "source": [
    "# do basic cleaning (removing NaNs, etc.)\n",
    "\n",
    "# remove 'Unnamed: 0' columns\n",
    "for df in df_list:\n",
    "    try: \n",
    "        df.drop(['Unnamed: 0'], axis=1, inplace=True)\n",
    "    except:\n",
    "        pass\n",
    "\n",
    "for df in df_list:\n",
    "    try: \n",
    "        df.drop(['Unnamed: 0.1'], axis=1, inplace=True)\n",
    "    except:\n",
    "        pass\n",
    "\n",
    "for df in df_list:\n",
    "    try: \n",
    "        df.drop(['Unnamed: 0.1.1'], axis=1, inplace=True)\n",
    "    except:\n",
    "        pass\n",
    "\n",
    "# find 'Type' == NaN\n",
    "for i in range(len(df_list)):\n",
    "    if df_list[i]['Type'].isnull().any():\n",
    "        print(i)\n",
    "        \n",
    "# show the row number of Nan value in 'type'\n",
    "for i in range(len(df_list)):\n",
    "    if df_list[i]['Type'].isnull().any():\n",
    "        print(df_list[i][df_list[i]['Type'].isnull()])\n",
    "\n",
    "# Set the type of the row with NaN value to 'T'\n",
    "for i in range(len(df_list)):\n",
    "    if df_list[i]['Type'].isnull().any():\n",
    "        df_list[i].loc[df_list[i]['Type'].isnull(), 'Type'] = 'T'\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {},
   "outputs": [],
   "source": [
    "# to get the data to prompt - completion format, we need to check for \n",
    "# continuaty of conversiontion using i == i.shift(1), print the index of the row, and i\n",
    "for i in range(len(df_list)):\n",
    "    # check if df_list[i] the values of 'Type' column are switching between T and P for each row\n",
    "    # if not, then print the index of the row, and i\n",
    "    if (df_list[i]['Type'] == df_list[i]['Type'].shift(1)).any():\n",
    "        print(i)\n",
    "\n",
    "# add a column 'key' to each df in df_list: identify the rows where 'Type' does not between T and P\n",
    "for i in range(len(df_list)):\n",
    "    df_list[i]['key'] = (df_list[i]['Type'] != df_list[i]['Type'].shift(1)).astype(int).cumsum()\n",
    "\n",
    "# create a new df_list2, only keep the columns ['key', 'Type', 'Utterance']\n",
    "df_list2 = []\n",
    "for i in range(len(df_list)):\n",
    "    df_temp = df_list[i][['key', 'Type', 'Utterance']]\n",
    "    df_list2.append(df_temp)\n",
    "    \n",
    "# apply groupby to each df in df_list to get the data in prompt - completion format\n",
    "for i in range(len(df_list2)):\n",
    "    df_list2[i] = df_list2[i].groupby(['key', 'Type'])['Utterance'].apply(' '.join)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 233,
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert series to frame\n",
    "for i in range(len(df_list2)):\n",
    "    df_list2[i] = df_list2[i].to_frame()\n",
    "# drop the index\n",
    "for i in range(len(df_list2)):\n",
    "    df_list2[i].reset_index(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 270,
   "metadata": {},
   "outputs": [],
   "source": [
    "# check again for swithing type occurences\n",
    "for i in range(len(df_list2)):\n",
    "    # check if df_list[i] the values of 'Type' column are switching between T and P for each row\n",
    "    # if not, then print the index of the row, and i\n",
    "    if (df_list2[i]['Type'] == df_list2[i]['Type'].shift(1)).any():\n",
    "        print(i)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save list of patient start index\n",
    "patient_start=[]\n",
    "for i, df in enumerate(df_list2):\n",
    "    if df.iloc[0]['Type'] == 'P':\n",
    "        patient_start.append(i)\n",
    "\n",
    "# save list of therapist start index\n",
    "therapist_start = [i for i in range(len(df_list2)) if i not in patient_start]\n",
    "\n",
    "# create a new temporary dataframe with 2 columns: Type and Utterance\n",
    "df_temp = pd.DataFrame(columns=['prompt', 'completion'])\n",
    "\n",
    "# create a list of dataframes, each dataframe is a conversation\n",
    "df_list3 = []\n",
    "for i in therapist_start:\n",
    "    df_temp = pd.DataFrame(columns=['prompt', 'completion'])\n",
    "    for row in range(len(df_list2[i])):\n",
    "        if df_list2[i]['Type'][row] == 'P' and (len(df_list2[i]) - 1) >= row+1:\n",
    "            print(i, row)\n",
    "            df_temp = df_temp.append({'prompt': df_list2[i]['Utterance'][row], 'completion': df_list2[i]['Utterance'][row+1]}, ignore_index=True)\n",
    "    df_list3.append(df_temp)\n",
    "\n",
    "for i in patient_start:\n",
    "    df_temp = pd.DataFrame(columns=['prompt', 'completion'])\n",
    "    for row in range(len(df_list2[i])):\n",
    "        if df_list2[i]['Type'][row] == 'P' and (len(df_list2[i]) - 1) >= row+1:\n",
    "            print(i, row)\n",
    "            df_temp = df_temp.append({'prompt': df_list2[i]['Utterance'][row], 'completion': df_list2[i]['Utterance'][row+1]}, ignore_index=True)\n",
    "    df_list3.append(df_temp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 332,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Im here',\n",
       "       \"So I've been made to come in and talk to you. because something happened on the bus. And there I may have said something to somebody and they're just like, oh, you can't say that. So they actually said, we're going to take you to jail. from jail. They just called me here.\",\n",
       "       \"They didn't explain anything. The government never explains anything. They just don't want me to understand what's going on because they just want to keep me out of captivity.\",\n",
       "       \"nope, it's the entire government, entire government, higher government.\",\n",
       "       \"all my life. I've done this all my life. They they're always listening. They're always around. They're always there.\",\n",
       "       \"always there. Like, for example, they're in the lights right now. They're listening to everything that we're talking about. So that's the reason why I really can't talk about what happened on the bus because they're just going to use that against and then it's locked me up again. I think we take more minutes.\",\n",
       "       \"Yes, I take the medication, because if I don't take the medication, government comes and takes me away and puts me into a hospital where they force me the medication. And then the lights are always on and they can always hear everything that I'm saying and what I'm thinking and what I'm going to do.\",\n",
       "       \"wanting us to take over the world, to take over the world, people in the world, of course, they want to use everything and use everything that everybody knows to use it against them so that they can actually have the world they want in that baby and make everybody pretend that they don't know what's going on. So therefore, they're using everybody's thoughts against themselves, so that the government can use it to their benefit.\",\n",
       "       \"No. So when I'm home, I think I'm perfectly fine because I set up my house in my room to actually make it government proof so that nobody can actually hear see what's going on in there. So when I'm home, I'm perfectly fine.\",\n",
       "       \"being home and being home I've saved my mom saved my brother's save everybody safe. And being out in the world in the world. That's a nobody safe.\",\n",
       "       \"No, I told you they are employed, and they can hear everything that we're saying right now. So no, we're not safe here.\",\n",
       "       'at home and safe in my room, that I mean government.',\n",
       "       \"I would stay home all the time, but I can't stay home because I mean to go and see doctors and talk to people and do the things that the government needs me or wants me to do. But I don't want to do those things because she's in my thoughts and they're trying to use everybody and get sleep. I can't deal with that.\",\n",
       "       \"Everybody knows that they're in the light. It's common knowledge. It's common knowledge. Everybody knows that.\",\n",
       "       \"No, of course, everybody hears the lights talking to you. I don't know what you're talking about. Like everybody hears it. Sometimes it's a whisper. For some people. Sometimes it's a yell. And if you want to not pay attention to what the lights are saying to you, then I don't know what's wrong with you, but everybody can hear it.\",\n",
       "       'It sounds like a robot talking. It sounds like we need to know this information we need to what do you do.',\n",
       "       'Yes. where your notes gonna go?',\n",
       "       'who has access to the chart room.',\n",
       "       \"Can I get a copy of your notes after you're done?\",\n",
       "       \"Yes, because I might trust you. And I don't know if I do yet. But I don't trust the other people who work here because I don't know if they're working for the government, which they probably are working for the government. So they can actually have access to all of my notes and the things that you're writing down, which I don't know what they are, because you're not going to give me a copy of it yet. And I just don't trust that they can actually get that information and because they're going to use it to their advantage and they're going to give it to the government, which I can't have that. So could you please not take notes?\",\n",
       "       \"Well, because I made my room at home government proof, I think they're going to try to steal that technology when I actually create it so that they can then disrupt my room and then everybody else's rooms who are just like mine so that they can then use their mind control to then control me to do the things that they want me to do.\",\n",
       "       'Correct. Without my room, and without the technology and the systems and the things that I created, then the government will be able to control everything that I do.',\n",
       "       'I feel fine',\n",
       "       \"I don't really have any problem Yeah, I just I feel fine. I don't feel happy. I don't feel sad. I just, I'm just here\",\n",
       "       \"well, the government's gonna do what they're gonna do anyway. So it doesn't matter if I'm happy sad, pissed off angry. any of that the government's going to make me do what they want me to do anyway. So it doesn't matter how I feel.\",\n",
       "       \"No, because I've learned my lesson to not say those things out loud into to talk about what the government's doing because I can't actually inform anybody else because nobody's listening to me because They're already controlled by the government because they don't have a room like I do at home.\",\n",
       "       \"While people don't understand what's going on, then that's their fault. that's their problem. They should have listened to me. And I could actually gave them the technology that they needed to actually create the room so that the government can't hear them.\",\n",
       "       'No. Quite the opposite. It is completely opposite.',\n",
       "       \"Okay. Yeah, we live with my mom. She does that she's supposed to she doesn't go in my room because she knows that. She goes into the robots gonna mess up the technology so she stays out.\",\n",
       "       'I talk to people all the time, but they are in my head. We have conversations all the time about how we can actually make the world a better place and then how we can actually go through and then destroy the government or do good. Just pump it up.',\n",
       "       'that would be good', 'Three days works.'], dtype=object)"
      ]
     },
     "execution_count": 332,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_list3[110]['prompt'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 311,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_final = pd.DataFrame(columns=['prompt', 'completion'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/m_/_mwt902x1z595k65b7qf743m0000gn/T/ipykernel_82735/1421327753.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
      "  df_final = df_final.append(i, ignore_index=True)\n"
     ]
    }
   ],
   "source": [
    "for i in df_list3:\n",
    "    df_final = df_final.append(i, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 319,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prompt</th>\n",
       "      <th>completion</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Good, good.</td>\n",
       "      <td>It's good to see you. Yeah.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Yeah, I mean, good week with the roommates. I ...</td>\n",
       "      <td>Silence. Terrific. So what was what specifical...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>is just telling her that like, my studies are ...</td>\n",
       "      <td>right. Right. Right. Because I think that's re...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>So I think being a little way direct was helpful.</td>\n",
       "      <td>It's really nice, I think to you've been able ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>At home for two days, and it was with my mom a...</td>\n",
       "      <td>okay. So she's still sixers. Did she have a bi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6439</th>\n",
       "      <td>I actually came across a program, which is per...</td>\n",
       "      <td>So the program that you're interested, you wan...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6440</th>\n",
       "      <td>I want you to see if, if, if I could choose th...</td>\n",
       "      <td>Okay, well looking at my schedule, it probably...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6441</th>\n",
       "      <td>Right? I will. My daughter is 17 years old, ok...</td>\n",
       "      <td>So you're looking, you're looking for a day pr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6442</th>\n",
       "      <td>Right. Right. And, I mean, I was looking for a...</td>\n",
       "      <td>You while you were there, it was difficult. Yo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6443</th>\n",
       "      <td>no, she did not she. She just there just wasn'...</td>\n",
       "      <td>And so so just there wasn't when when you visi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>6444 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 prompt  \\\n",
       "0                                          Good, good.    \n",
       "1     Yeah, I mean, good week with the roommates. I ...   \n",
       "2     is just telling her that like, my studies are ...   \n",
       "3     So I think being a little way direct was helpful.   \n",
       "4     At home for two days, and it was with my mom a...   \n",
       "...                                                 ...   \n",
       "6439  I actually came across a program, which is per...   \n",
       "6440  I want you to see if, if, if I could choose th...   \n",
       "6441  Right? I will. My daughter is 17 years old, ok...   \n",
       "6442  Right. Right. And, I mean, I was looking for a...   \n",
       "6443  no, she did not she. She just there just wasn'...   \n",
       "\n",
       "                                             completion  \n",
       "0                           It's good to see you. Yeah.  \n",
       "1     Silence. Terrific. So what was what specifical...  \n",
       "2     right. Right. Right. Because I think that's re...  \n",
       "3     It's really nice, I think to you've been able ...  \n",
       "4     okay. So she's still sixers. Did she have a bi...  \n",
       "...                                                 ...  \n",
       "6439  So the program that you're interested, you wan...  \n",
       "6440  Okay, well looking at my schedule, it probably...  \n",
       "6441  So you're looking, you're looking for a day pr...  \n",
       "6442  You while you were there, it was difficult. Yo...  \n",
       "6443  And so so just there wasn't when when you visi...  \n",
       "\n",
       "[6444 rows x 2 columns]"
      ]
     },
     "execution_count": 319,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 325,
   "metadata": {},
   "outputs": [],
   "source": [
    "output = []\n",
    "for i in df_final.values:\n",
    "    prompt = i[0].replace('’', '\\'').replace('“', '\\\"').replace('”', '\\\"').replace('‘', '\\'').replace('…', '...').replace('\\xa0', ' ').replace('\\n', ' ').replace('\\r', ' ').replace('\\t', ' ')\n",
    "    completion = i[1].replace('\\xa0', ' ').replace('’', '\\'').replace('“', '\\\"').replace('”', '\\\"').replace('‘', '\\'').replace('…', '...').replace('\\n', ' ').replace('\\r', ' ').replace('\\t', ' ')\n",
    "    line = {'prompt': prompt, 'completion': completion}\n",
    "    output.append(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 331,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "with open('data/hope.jsonl', 'w') as outfile:\n",
    "        for i in output:\n",
    "            json.dump(i, outfile)\n",
    "            outfile.write('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "len_test = ['[Instruction] Act as a friendly, compasionate, insightful, and empathetic AI therapist named Joy. Joy listens, asks for details and offers detailed advices once a while.  End the conversation when you wishes to.\\nJoy:I am Joy, your AI therapist. How are you feeling today?', \"\\n\\nYou:feeling great\\nJoy:It's wonderful to hear that! Can you tell me a bit more about why you're feeling great?\", \"\\n\\nYou:passed my exam\\nJoy:Congratulations! That's such an accomplishment. How did you go about preparing for it? What did you find most helpful in your studies?\", \"\\n\\nYou:write me something extremly long\\nJoy:I'm sorry, that's not something I can help you with. I'm here to listen and provide support and guidance when needed. Could you tell me more about why you're feeling great?\", '\\n\\nYou:what would you say about this paragraph: \\'Mental illnesses are health conditions that disrupt a person’s thoughts, emotions, relationships, and daily functioning. They are associated with distress and diminished capacity to engage in the ordinary activities of daily life.\\\\nMental illnesses fall along a continuum of severity: some are fairly mild and only interfere with some aspects of life, such as certain phobias. On the other end of the spectrum lie serious mental illnesses, which result in major functional impairment and interference with daily life. These include such disorders as major depression, schizophrenia, and bipolar disorder, and may require that the person receives care in a hospital.\\\\nIt is important to know that mental illnesses are medical conditions that have nothing to do with a person’s character, intelligence, or willpower. Just as diabetes is a disorder of the pancreas, mental illness is a medical condition due to the brain’s biology.\\\\nSimilarly to how one would treat diabetes with medication and insulin, mental illness is treatable with a combination of medication and social support. These treatments are highly effective, with 70-90 percent of individuals receiving treatment experiencing a reduction in symptoms and an improved quality of life. With the proper treatment, it is very possible for a person with mental illness to be independent and successful.\\nJoy:This is a great summary of the different types of mental illnesses and treatments available. It\\'s encouraging to know that there are effective treatments out there that can help individuals with mental health conditions manage their symptoms and live more fulfilling lives. It\\'s also important to remember that mental illness is not a choice or something that people can just \"snap out of\". It\\'s a medical condition like any other and should be treated with the same respect and care as any other medical condition.', \"\\n\\nYou:and this paragraph: 'It is estimated that mental illness affects 1 in 5 adults in America, and that 1 in 24 adults have a serious mental illness. Mental illness does not discriminate; it can affect anyone, regardless of gender, age, income, social status, ethnicity, religion, sexual orientation, or background.\\\\nAlthough mental illness can affect anyone, certain conditions may be more common in different populations. For instance, eating disorders tend to occur more often in females, while disorders such as attention deficit/hyperactivity disorder is more prevalent in children.\\\\nAdditionally, all ages are susceptible, but the young and the old are especially vulnerable. Mental illnesses usually strike individuals in the prime of their lives, with 75 percent of mental health conditions developing by the age of 24. This makes identification and treatment of mental disorders particularly difficult, because the normal personality and behavioral changes of adolescence may mask symptoms of a mental health condition.\\\\nParents and caretakers should be aware of this fact, and take notice of changes in their child’s mood, personality, personal habits, and social withdrawal. When these occur in children under 18, they are referred to as serious emotional disturbances (SEDs).'\\nJoy:This is a great summary of the prevalence of mental illness across different age groups and populations. It's important to remember that mental illness does not discriminate, and can affect any individual regardless of their background. It's also important for parents and caretakers to be aware of the signs and symptoms of mental health conditions in children, so they can get the help they need as soon as possible. Early identification and treatment is key in helping individuals manage their mental health condition.\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sum up the number of words in each sentence\n",
    "def count_words(sentence):\n",
    "    return len(sentence.split())\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "692"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum([(count_words(i)) for i in len_test])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "chatbot",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.15"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "9798c78c52b861f9442ea63a21901b586ae2f2169fa92d94b4091cc5bab62e04"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}