Whatsapp chat analyzer web app with Python and Symbl.ai in < 100 lines of code!

kmukabe

Kacha

Posted on March 24, 2022

Whatsapp chat analyzer web app with Python and Symbl.ai in < 100 lines of code!

Creating a Whatsapp Chat analyser web app with Symbl.ai and python in less than 100 lines of code

This application will take an .txt export of a Whatsapp chat. The application will then run conversational analysis on the .txt file. Symbl.ai will then extract information like follow ups, topics and action items.

The application requires the following libraries:

  • streamlit: This library converts python scripts into web applications.
  • symbl.ai: This is a conversational A.I library that can get insights from audio and text.

Starting the Streamlit app

Streamlit is a great library I found that can convert python scripts into a web application.

Lets create an application that will accept a whatsapp txt chat file and parse the chat data.

import streamlit as st


chat_file = st.file_uploader("Select chat file")

if chat_file:
    conversation_request = handle_uploaded_file(chat_file)
Enter fullscreen mode Exit fullscreen mode

To handle the file and extract the messages in a format that symbl will understand we need to create a function to do it.

def handle_uploaded_file(file):
    chat = file.read().decode("utf-8")

    # Split each line from the file into a list and strip all whitespace
    lines = chat.splitlines()
    lines = [line.strip() for line in lines]

    messages = []
    cleaned_lines = []

    # If line does not start with a Whatsapp date format [m/d/y, h:m:s am] then append it to the previous line.
    for x in range(len(lines)):
        if lines[x].find("[") == -1:
            cleaned_lines[-1] = f"{cleaned_lines[-1]} {lines[x]}"
            continue
        cleaned_lines.append(lines[x])

    # pares the time, name and message from a line
    for line in cleaned_lines:
        st_dt, en_dt = line.find("["), line.find("]")
        time = line[st_dt:en_dt].replace("[", "")
        name = line[en_dt:].split(":")[0].replace("]", "")
        st_m = line[en_dt:].find(":")
        message = line[en_dt:][st_m:].replace(":", "")

        # Create messages in the format Symbl requires
        messages.append(
            {
            "duration": {"startTime": arrow.get(time, 'M/D/YY, H:mm:ss A').format(), "endTime": arrow.get(time, 'M/D/YY, H:mm:ss A').format()},
            "payload": {"content": message, "contentType": "text/plain"},
            "from": {"name": name, "userId": name},
            }
        )

    return {
        "name": "Chat",
            "confidenceThreshold": 0.6,
            "detectPhrases": True,
            "messages": messages,
    }
Enter fullscreen mode Exit fullscreen mode

Once we have the messages from the handle request file we can analyse it with Symbl.ai

import symbl

...

conversation_object = symbl.Text.process(payload=conversation_request)
Enter fullscreen mode Exit fullscreen mode

Displaying the results

Add some display components using streamlit.

with st.expander("Topics"):
    with st.spinner("Getting topics"):
        topics = symbl.Conversations.get_topics(conversation_id=conversation_id)
    for topic in topics.topics:
        st.write(f"- {topic.text}")

with st.expander("Action Points"):
    with st.spinner("Getting action items"):
        action_items = symbl.Conversations.get_action_items(conversation_id=conversation_id)
    for item in action_items.action_items:
        st.write(f"- {item.text}")

with st.expander("Questions"):
    questions = symbl.Conversations.get_questions(conversation_id=conversation_id)
    for question in questions.questions:
        st.write(f"- {question.text}")

with st.expander("Follow ups"):
    follow_ups = symbl.Conversations.get_follow_ups(conversation_id=conversation_id)
    for follow_up in follow_ups.follow_ups:
        st.write(f"- {follow_up.text}")
Enter fullscreen mode Exit fullscreen mode

The final code file

import streamlit as st
import arrow
import symbl


def handle_uploaded_file(file):
    chat = file.read().decode("utf-8")
    lines = chat.splitlines()
    lines = [l.strip() for l in lines]
    messages = []
    cleaned_lines = []
    for x in range(len(lines)):
        if lines[x].find("[") == -1:
            cleaned_lines[-1] = f"{cleaned_lines[-1]} {lines[x]}"
            continue
    cleaned_lines.append(lines[x])
    for line in cleaned_lines:
        st_dt, en_dt = line.find("["), line.find("]")
        time = line[st_dt:en_dt].replace("[", "")
        name = line[en_dt:].split(":")[0].replace("]", "")
        st_m = line[en_dt:].find(":")
        message = line[en_dt:][st_m:].replace(":", "")
        messages.append(
            {
                "duration": {"startTime": arrow.get(time, 'M/D/YY, H:mm:ss A').format(), "endTime": arrow.get(time, 'M/D/YY, H:mm:ss A').format()},
                "payload": {"content": message, "contentType": "text/plain"},
                "from": {"name": name, "userId": name},
            }
        )
    return {
        "name": "Chat",
            "confidenceThreshold": 0.6,
            "detectPhrases": True,
            "messages": messages,
    }


chat_file = st.file_uploader("Select chat file")

if chat_file:
    conversation_request = handle_uploaded_file(chat_file)
    with st.spinner("Analysing Text"):
        conversation_object = symbl.Text.process(payload=conversation_request)
    conversation_id = conversation_object.get_conversation_id()
    st.write(conversation_id)

    with st.expander("Topics"):
        with st.spinner("Getting topics"):
            topics = symbl.Conversations.get_topics(conversation_id=conversation_id)
        for topic in topics.topics:
            st.write(f"- {topic.text}")

    with st.expander("Action Points"):
        with st.spinner("Getting action items"):
            action_items = symbl.Conversations.get_action_items(conversation_id=conversation_id)
        for item in action_items.action_items:
            st.write(f"- {item.text}")

    with st.expander("Questions"):
        questions = symbl.Conversations.get_questions(conversation_id=conversation_id)
        for question in questions.questions:
            st.write(f"- {question.text}")

    with st.expander("Follow ups"):
        follow_ups = symbl.Conversations.get_follow_ups(conversation_id=conversation_id)
        for follow_up in follow_ups.follow_ups:
            st.write(f"- {follow_up.text}")
Enter fullscreen mode Exit fullscreen mode

Screenshots of our final application

Upload chat file

Symbl.ai analysis

Symbl.ai analysis results

💖 💪 🙅 🚩
kmukabe
Kacha

Posted on March 24, 2022

Join Our Newsletter. No Spam, Only the good stuff.

Sign up to receive the latest update from our blog.

Related