#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@description: Sentiment analysis using lemmatized data
"""

import pandas as pd
from moda.dataprep import raw_to_ts, ts_to_range
import matplotlib.pyplot as plt
import numpy as np
import pickle

# Before this, run dna_clean.py

with open('df_cleaned', 'rb') as f:
    df = pickle.load(f)

#### SENTIMENT ANALYSIS

## Calculate negativity for each document

def word_count(text):
    counts = len(text)
    return counts

def loadPositive():
    """
    loading positive dictionary
    """
    myfile = open('LoughranMcDonald_Positive.txt', "r")
    positives = myfile.readlines()
    positive = [pos.strip().lower() for pos in positives]
    return positive

def loadNegative():
    """
    loading positive dictionary
    """
    myfile = open('LoughranMcDonald_Negative.txt', "r")
    negatives = myfile.readlines()
    negative = [neg.strip().lower() for neg in negatives]
    return negative

def loadUncertainty():
    """
    loading positive dictionary
    """
    myfile = open('LoughranMcDonald_Uncertainty.txt', "r")
    unc = myfile.readlines()
    uncertainty = [neg.strip().lower() for neg in unc]
    return uncertainty

def countNeg(cleantext, negative):
    """
    counts negative words in cleantext
    """
    negs = [word for word in cleantext if word in negative]
    return len(negs)

def countPos(cleantext, positive):
    """
    counts negative words in cleantext
    """
    pos = [word for word in cleantext if word in positive]
    return len(pos)

def countUnc(cleantext, uncertainty):
    """
    counts negative words in cleantext
    """
    unc = [word for word in cleantext if word in uncertainty]
    return len(unc)

def Neg(cleantext, negative):
    """
    counts negative words in cleantext
    """
    negs = [word for word in cleantext if word in negative]
    return negs

def Pos(cleantext, positive):
    """
    counts negative words in cleantext
    """
    pos = [word for word in cleantext if word in positive]
    return pos

def Unc(cleantext, uncertainty):
    """
    counts negative words in cleantext
    """
    unc = [word for word in cleantext if word in uncertainty]
    return unc

def getSentiment(cleantext, negative, positive):
    """
    counts negative and positive words in cleantext and returns a score category accordingly
    """
    positive = loadPositive()
    negative = loadNegative()
    sentiment = countPos(cleantext, positive) - countNeg(cleantext, negative)
    if sentiment > 3:
        return 4
    elif sentiment > 0 and sentiment <=3:
        return 3
    elif sentiment < 0 and sentiment > -3:
        return 2
    elif sentiment <= -3:
        return 1
    else:
        return 0
    
### Get only certain uncertain words

uncwords=['risk','risky','uncertain','uncertainty']

def countunc_Lite(cleantext, uncwords):
    """
    counts negative words in cleantext
    """
    countunc_lite= [word for word in cleantext if word in uncwords]
    return len(countunc_lite)

    
def updateSentimentDataFrame(df):
    """
    performs sentiment analysis on single text entry of dataframe and returns dataframe with scores
    """
    positive = loadPositive()
    negative = loadNegative()   
    uncertainty = loadUncertainty()
    
    df['wordcount'] = df['lem'].apply(word_count)
    df['score'] = df['lem'].apply(lambda x: getSentiment(x,negative, positive))
    df['countpos'] = df['lem'].apply(lambda x: countPos(x,positive))
    df['countneg'] = df['lem'].apply(lambda x: countNeg(x,negative))
    df['countunc'] = df['lem'].apply(lambda x: countUnc(x,uncertainty))
    df['countunc_lite']  = df['lem'].apply(lambda x: countunc_Lite(x,uncwords))
    df['pos'] = df['lem'].apply(lambda x: Pos(x,positive))
    df['neg'] = df['lem'].apply(lambda x: Neg(x,negative))
    df['unc'] = df['lem'].apply(lambda x: Unc(x,uncertainty))

    
    return df

print('Performing Sentiment...')
df_cutrem = updateSentimentDataFrame(df)

#### save files 

with open('df_sent', 'wb') as f:
    pickle.dump(df_cutrem, f)
