#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@description: Collapse to daily, weekly and monthly time series of NSI and NUI
"""
import pandas as pd
import numpy as np
import pickle
import datetime as dt
raw = pickle.load( open( "df_cat", "rb" ) )


### Daily time series
raw_day = raw[['countpos','countneg','countunc','wordcount','date','unc_doc','countunc_lite']]
raw_day_mean=raw_day.groupby(['date'], as_index=False).sum()
raw_day_mean['count'] = raw_day_mean['date'].map(raw_day['date'].value_counts())
raw_day_mean['nsi'] = (raw_day_mean['countpos']-raw_day_mean['countneg'])/raw_day_mean['wordcount']
raw_day_mean['nui'] = raw_day_mean['unc_doc']/raw_day_mean['count']*100
raw_day_mean.to_csv('raw_day_mean.csv')

### Weekly time series
raw_wy = raw[['countpos','countneg','countunc','wordcount','week','unc_doc','countunc_lite']]
raw_wy_mean=raw_wy.groupby(['week'], as_index=False).sum()
raw_wy_mean['count'] = raw_wy_mean['week'].map(raw_wy['week'].value_counts())
raw_wy_mean['nsi'] = (raw_wy_mean['countpos']-raw_wy_mean['countneg'])/raw_wy_mean['wordcount']
raw_wy_mean['nui'] = raw_wy_mean['unc_doc']/raw_wy_mean['count']*100
raw_wy_mean['weekyear'] = raw_wy_mean['week'].dt.strftime('%Y-w%V')
raw_wy_mean.to_csv('raw_wy_mean.csv')

### Monthly time series
raw_my = raw[['countpos','countneg','countunc','wordcount','month_year','unc_doc','countunc_lite']]
raw_my_mean=raw_my.groupby(['month_year'], as_index=False).sum()
raw_my_mean['count'] = raw_my_mean['month_year'].map(raw_my['month_year'].value_counts())
raw_my_mean['nsi'] = (raw_my_mean['countpos']-raw_my_mean['countneg'])/raw_my_mean['wordcount']raw_my_mean['rnsi'] = (raw_my_mean['countpos']-raw_my_mean['countneg'])/(raw_my_mean['countpos']+raw_my_mean['countneg'])
raw_my_mean['nui'] = raw_my_mean['unc_doc']/raw_my_mean['count']*100
raw_my_mean.to_csv('raw_my_mean.csv')
keep = raw_my_mean[['month_year','nsi','nui']]
def bom(string):
    return string.strftime("%Y-%m-01")
keep['date'] = keep['month_year'].apply(lambda x: bom(x))
keep.to_csv('nsi_nui_data.csv')


### Monetary Policy Daily time series
raw_mpolicy_day = raw_mpolicy[['countpos','countneg','countunc','wordcount','date','unc_doc','countunc_lite']]
raw_mpolicy_day_mean=raw_mpolicy_day.groupby(['date'], as_index=False).sum()
raw_mpolicy_day_mean['count'] = raw_mpolicy_day_mean['date'].map(raw_mpolicy_day['date'].value_counts())
raw_mpolicy_day_mean['mpnsi'] = (raw_mpolicy_day_mean['countpos']-raw_mpolicy_day_mean['countneg'])/raw_mpolicy_day_mean['wordcount']
raw_mpolicy_day_mean['mpnui'] = raw_mpolicy_day_mean['unc_doc']/raw_mpolicy_day_mean['count']*100
raw_mpolicy_day_mean.to_csv('raw_mpolicy_day_mean.csv')


### Housing monthly time series

raw_real_my = raw_real[['countpos','countneg','countunc','wordcount','month_year','unc_doc','countunc_lite']]
raw_real_my_sum = raw_real_my.groupby(['month_year'], as_index=False).sum()
raw_real_my_sum ['count'] = raw_real_my_sum ['month_year'].map(raw_real_my['month_year'].value_counts())
raw_real_my_sum['hnsi'] = (raw_real_my_sum['countpos']-raw_real_my_sum['countneg'])/raw_real_my_sum['wordcount']raw_real_my_sum['rhsi'] = (raw_real_my_sum['countpos']-raw_real_my_sum['countneg'])/(raw_real_my_sum['countpos']+raw_real_my_sum['countneg'])
raw_real_my_sum['hnui'] = raw_real_my_sum['unc_doc']/raw_real_my_sum['count']*100
keep = raw_real_my_sum[['month_year','hnsi','hnui']]
def bom(string):
    return string.strftime("%Y-%m-01")
keep['date'] = keep['month_year'].apply(lambda x: bom(x))
keep.to_csv('nsi_nui_data.csv')
keep.to_csv('hnsi.csv')


real_sydney = raw_real[raw_real['sydney'] == True]
real_sydney_my = real_sydney[['countpos','countneg','countunc','wordcount','month_year','unc_doc','countunc_lite']]
real_sydney_my_sum = real_sydney_my.groupby(['month_year'], as_index=False).sum()
real_sydney_my_sum ['count'] = real_sydney_my_sum ['month_year'].map(real_sydney_my['month_year'].value_counts())
real_sydney_my_sum['sydney_hnsi'] = (real_sydney_my_sum['countpos']-real_sydney_my_sum['countneg'])/real_sydney_my_sum['wordcount']
real_sydney_my_sum['sydney_hnui'] = real_sydney_my_sum['unc_doc']/real_sydney_my_sum['count']*100
keep = real_sydney_my_sum[['month_year','sydney_hnsi','sydney_hnui']]
def bom(string):
    return string.strftime("%Y-%m-01")
keep['date'] = keep['month_year'].apply(lambda x: bom(x))
keep.to_csv('sydney_hnsi.csv')


real_melbourne = raw_real[raw_real['melb'] == True]
real_melbourne_my = real_melbourne[['countpos','countneg','countunc','wordcount','month_year','unc_doc','countunc_lite']]
real_melbourne_my_sum = real_melbourne_my.groupby(['month_year'], as_index=False).sum()
real_melbourne_my_sum ['count'] = real_melbourne_my_sum ['month_year'].map(real_melbourne_my['month_year'].value_counts())
real_melbourne_my_sum['melb_hnsi'] = (real_melbourne_my_sum['countpos']-real_melbourne_my_sum['countneg'])/real_melbourne_my_sum['wordcount']
real_melbourne_my_sum['melb_hnui'] = real_melbourne_my_sum['unc_doc']/real_melbourne_my_sum['count']*100
real_melbourne_my_sum.to_csv('real_melbourne_my_sum.csv')
keep = real_melbourne_my_sum[['month_year','melb_hnsi','melb_hnui']]
def bom(string):
    return string.strftime("%Y-%m-01")
keep['date'] = keep['month_year'].apply(lambda x: bom(x))
keep.to_csv('melb_hnsi.csv')

# Merge housing series

real_all = pd.merge(raw_real_my_sum[['month_year','hnsi']],real_sydney_my_sum[['month_year','sydney_hnsi']], on='month_year')
real_all = pd.merge(real_all,real_melbourne_my_sum[['month_year','melb_hnsi']], on='month_year')
real_all.to_csv('real_all.csv')

