diff --git a/Analysis/Analysis.ipynb b/Analysis/Analysis.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..72a82421b5c708eb8d06f7921ca672c44bd2ccd1 --- /dev/null +++ b/Analysis/Analysis.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5a589865-2f6c-49bb-953c-f09e877cd512", + "metadata": {}, + "source": [ + "## Network Set up Between Instances" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e8af1dc0-dbc0-4155-83a1-35b618996d16", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully connected to CouchDB\n", + "{\"db_name\":\"photon\",\"purge_seq\":\"0-g1AAAABZeJzLYWBgYMpgTmEQSs4vTc5ISXIwNDfSMzTXM9AzyQHJ5bEASYYGIPUfCLISGfApTmRIqoeoygIA4GAYvA\",\"update_seq\":\"3-g1AAAACfeJzLYWBgYMpgTmEQSs4vTc5ISXIwNDfSMzTXM9AzyQHJ5bEASYYGIPUfCLIymBOZcoEC7IZGSaYGSclYNeIzL5EhqR5qECPYoGSjFOPUJAOsOrIAjHUpiw\",\"sizes\":{\"file\":770417,\"external\":726543,\"active\":726923},\"props\":{},\"doc_del_count\":0,\"doc_count\":1,\"disk_format_version\":8,\"compact_running\":false,\"cluster\":{\"q\":2,\"n\":3,\"w\":2,\"r\":2},\"instance_start_time\":\"0\"}\n", + "\n" + ] + } + ], + "source": [ + "import requests\n", + "\n", + "# replace with your CouchDB URL, database name, and credentials\n", + "couchdb_url = \"http://172.26.136.104:5987\"\n", + "database_name = \"photon\"\n", + "username = \"admin\"\n", + "password = \"admin\"\n", + "\n", + "response = requests.get(f\"{couchdb_url}/{database_name}\", auth=(username, password))\n", + "\n", + "# check that the request was successful\n", + "if response.status_code == 200:\n", + " print(\"Successfully connected to CouchDB\")\n", + "else:\n", + " print(\"Failed to connect to CouchDB:\", response.status_code)\n", + "\n", + "# print the first 500 characters of the response\n", + "print(response.text[:500])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9389265b-6c95-423a-aa9e-d0e89bb66a91", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package vader_lexicon to\n", + "[nltk_data] /home/jovyan/nltk_data...\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "import couchdb\n", + "import pandas as pd\n", + "import numpy as np\n", + "import re\n", + "import nltk\n", + "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", + "import emoji\n", + "from textblob import TextBlob\n", + "from fuzzywuzzy import process\n", + "import matplotlib.pyplot as plt\n", + "from wordcloud import WordCloud\n", + "\n", + "\n", + "nltk.download('vader_lexicon')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5b0e64ab-1d17-42fa-b94d-a47f0073be17", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting couchdb\n", + " Downloading CouchDB-1.2-py2.py3-none-any.whl (67 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m68.0/68.0 kB\u001b[0m \u001b[31m199.9 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hInstalling collected packages: couchdb\n", + "Successfully installed couchdb-1.2\n" + ] + } + ], + "source": [ + "!pip install couchdb" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "32ffa972-8225-45b7-9d08-90c223b029c9", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting cloudant\n", + " Downloading cloudant-2.15.0-py3-none-any.whl (80 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.5/80.5 kB\u001b[0m \u001b[31m220.9 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: requests<3.0.0,>=2.7.0 in /opt/conda/lib/python3.10/site-packages (from cloudant) (2.29.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.7.0->cloudant) (3.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.7.0->cloudant) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.7.0->cloudant) (1.26.15)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0,>=2.7.0->cloudant) (2023.5.7)\n", + "Installing collected packages: cloudant\n", + "Successfully installed cloudant-2.15.0\n" + ] + } + ], + "source": [ + "!pip install cloudant" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "5f685769-275e-4346-8180-9e16dc4ccbe9", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " location text\n", + "0 Melbourne, Victoria AshramGzb tdybsayudbysauaduyadbuadbyau\n", + "1 Melbourne, Victoria 1233123123123133123123123\n", + "2 Melbourne, Victoria @AshramGzb @narendramodi बिल्कुल सही कहा\n", + "3 Melbourne, Victoria @PM_Jordan DL Down3r´s hit inspirational song ...\n", + "4 Melbourne, Victoria @alcoholicsuide DL Down3r´s hit inspirational ...\n", + ".. ... ...\n", + "494 Melbourne, Victoria @redrobinhood123 Plays, just twitter just warn...\n", + "495 Melbourne, Victoria @ThaiSN30983779 🔥😜🔥\n", + "496 Melbourne, Victoria @embereon Will this do for you https://t.co/zV...\n", + "497 Melbourne, Victoria @PtUNfXAe99QsIIp @1990xiaoxiaoxia @aMeow0429 @...\n", + "498 Melbourne, Victoria @PtUNfXAe99QsIIp @aMeow0429 @1990xiaoxiaoxia @...\n", + "\n", + "[499 rows x 2 columns]\n" + ] + } + ], + "source": [ + "from cloudant.client import CouchDB\n", + "import pandas as pd\n", + "\n", + "# Connect to CouchDB\n", + "client = CouchDB('admin', 'admin', url='http://172.26.136.104:5987', connect=True)\n", + "\n", + "# Access the 'twitter' database\n", + "db = client['twitter']\n", + "\n", + "# Get a list of all document IDs in the database\n", + "doc_ids = [doc['_id'] for doc in db]\n", + "\n", + "# Print the document IDs for debugging\n", + "#print(f\"Document IDs: {doc_ids}\")\n", + "\n", + "# Fetch each document by its ID and store the 'location' and 'text' fields in a DataFrame\n", + "data = []\n", + "for doc_id in doc_ids:\n", + " if not doc_id.startswith('_'): # Skip special documents\n", + " doc = db[doc_id]\n", + " location = doc.get('location')\n", + " text = doc.get('text')\n", + " if location and text: # Only include documents that have both fields\n", + " data.append({'location': location, 'text': text})\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "print(df)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "fd603603-ae6b-41f5-8cae-dd04957dbc28", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>location</th>\n", + " <th>text</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [location, text]\n", + "Index: []" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79d78a74-449b-4f96-9c64-d8a15ad0132e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4d3cfff-4189-43df-bb60-0b1932aaa0e4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}