ggplot(hdi) +
geom_segment( aes(x=reorder(state, hdi[,7]), xend=state, y=hdi[,7], yend=hdi[,6]), color="grey") +
geom_point( aes(x=reorder(state, hdi[,7]), y=hdi[,7]), color="darkred", size=3 ) +
geom_point( aes(x=reorder(state, hdi[,7]), y=hdi[,6]), color="darkblue", size=3 ) +
ggplot2::annotate("text", x = 30, y = 115,
label = "2023",
color = "darkred", size = 5, hjust = -0.1, vjust = .75) +
ggplot2::annotate("text", x = 42, y = 100, label = "2022",
color = "darkblue", size = 5, hjust = -0.1, vjust = -.1) +
coord_flip()+
theme_minimal() +
theme(legend.position = "none") +
xlab("") +
ylab("Economic Performance Index, 2019 == 100") +
labs(caption = "Source: CT Dept of Labor, CT Dept Economic & Community Development ") +
ggtitle("Economic Performance Index, 2022:2023") +
theme(
legend.position = "none",
#text = element_text(family = "Georgia"),
axis.text.y = element_text(size = 10),
plot.title = element_text(size = 16,
margin = ggplot2::margin(b = 10),
hjust = 0),
plot.subtitle = element_text(size = 12, color = "black",
margin = ggplot2::margin(b = 25, l = -25)
),
plot.caption = element_text(size = 8,
margin = ggplot2::margin(t = 10),
color = "grey70", hjust = 0))
hdi$`change22-23` =as.numeric(hdi$`change22-23`)
g1 = ggplot(hdi, aes(x = hdi[,2], y = reorder(state, hdi[,2]), color = state)) +
geom_segment(aes(x = 0, y = state, xend = hdi[,2], yend = state), color = "grey") +
geom_point() +
theme(legend.position = "NULL")+
ggplot2::annotate("text", x = 0.05, y = 20, label = "Positive Growth", color = "#00BFC4",
size = 5, hjust = -0.1, vjust = .75) +
ggplot2::annotate("text", x = 0.05, y = 5, label = "Negative Growth", color = "#F8766D",
size = 5, hjust = -0.1, vjust = -.1) +
geom_vline(xintercept = 0, col = "darkred", linewidth =0.25) +
geom_segment(aes(x = 0.035, xend = 0.035, y = 15, yend = 30),
arrow = arrow(length = unit(0.2,"cm")),
linewidth = 1.2,
color = "#00BFC4") +
geom_segment(aes(x = 0.035, xend = 0.035 , y = 10, yend = 2),
arrow = arrow(length = unit(0.2,"cm")),
linewidth = 1.2,
color = "#F8766D") +
ggtitle("State Economic Performance:
Year to Year Change 2022-2023") +
xlab("Percent Change") +
ylab("") +
labs(caption = "Source: CT Dept of Labor, CT Dept Economic & Community Development ")+
theme_minimal() +
scale_x_continuous(labels = scales::percent_format()) +
theme(
#axis.title = element_blank(),
# panel.grid.minor = element_blank(),
legend.position = "none",
#text = element_text(family = "Georgia"),
axis.text.y = element_text(size = 8),
plot.title = element_text(size = 15,
margin = ggplot2::margin(b = 10),
hjust = 0),
plot.subtitle = element_text(size = 12, color = "darkslategrey",
margin = ggplot2::margin(b = 25, l = -25)),
plot.caption = element_text(size = 8,
margin = ggplot2::margin(t = 10),
color = "grey70", hjust = 0))
suppressWarnings(print(g1))
library(readxl)
my_custom_name_repair <- function(nms) tolower(gsub(" ", "_", nms))
#my_custom_name_repair <- function(nms) tolower(nms)
myremovex = function(nms) gsub("x", "", nms)
roster_raw <- read_excel("SEI2010-2023_AROD.xlsx",
sheet = "SEI",
range = "A2:O53",
#.name_repair = myremovex
.name_repair = make_clean_names
#.name_repair = my_custom_name_repair
)
myfixstate = function(nms) gsub(" ", "", nms)
roster_raw$state = myfixstate(roster_raw$state)
hdi = roster_raw |> dplyr::select(state, "x2022", "x2023")
colnames(hdi) = c("state", "2022", "2023")
hdi$`change22-23` = (hdi$`2023` - hdi$`2022`)/hdi$`2022`
hdi$`change22-23` = as.numeric(hdi$`change22-23`)
hdi = as.data.frame(hdi)
## Establishment =====
establish_raw <- read_excel("SEI2010-2023_AROD.xlsx",
sheet = "establishment",
range = "AS2:AS53",
.name_repair = make_clean_names
)
employment_raw <- read_excel("SEI2010-2023_AROD.xlsx",
sheet = "employment",
range = "AS2:AS53",
.name_repair = make_clean_names
)
realwage_raw <- read_excel("SEI2010-2023_AROD.xlsx",
sheet = "real wage",
range = "AS2:AS53",
.name_repair = make_clean_names
)
unemp_raw <- read_excel("SEI2010-2023_AROD.xlsx",
sheet = "unemployment rate",
range = "BH2:BH53",
.name_repair = make_clean_names
)
hdi = cbind.data.frame(hdi, establish_raw, employment_raw, realwage_raw, unemp_raw)
colnames(hdi)[5:8] = c("establish", "employment", "realwages", "unemployment")
myhdi = hdi |> dplyr::select(-c("2022", "change22-23"))
colnames(myhdi)[2] = "hdi"
last run in the editor is displayed.
g3 = ggplot(hdi) +
geom_segment( aes(x=reorder(state, hdi[,3]), xend=state, y=hdi[,3], yend=hdi[,2]), color="grey") +
#geom_point( aes(x=reorder(state, hdi[,7]), y=hdi[,7]), color=rgb(0.5,0.7,0.1,0.5), size=3 ) +
geom_point( aes(x=reorder(state, hdi[,3]), y=hdi[,3]), color="darkred", size=3 ) +
geom_point( aes(x=reorder(state, hdi[,3]), y=hdi[,2]), color="darkblue", size=3 ) +
ggplot2::annotate("text", x = 30, y = 115,
label = "2023",
color = "darkred", size = 5, hjust = -0.1, vjust = .75) +
ggplot2::annotate("text", x = 42, y = 100, label = "2022",
color = "darkblue", size = 5, hjust = -0.1, vjust = -.1) +
coord_flip()+
theme_minimal() +
#theme(legend.position = "none") +
xlab("") +
ylab("Economic Performance Index, 2019 == 100") +
ggtitle("Economic Performance Index, 2022:2023") +
theme(
legend.position = "none",
#text = element_text(family = "Georgia"),
axis.text.y = element_text(size = 10),
plot.title = element_text(size = 16,
#margin = margin(b = 10),
hjust = 0),
plot.subtitle = element_text(size = 12, color = "black"
#margin = margin(b = 25, l = -25)
),
plot.caption = element_text(size = 8,
#margin = margin(t = 10),
color = "grey70", hjust = 0)
)
suppressWarnings(print(g3))
# RF
library(Boruta)
myhdi = myhdi |> column_to_rownames("state")
head(myhdi)
myboruta = Boruta(hdi~., myhdi )
plot(myboruta)
#cbind(rank(myhdi$hdi), rank(myhdi$unemployment))
#matplot(cbind(sort(rank(myhdi$hdi)), sort(rank(myhdi$unemployment))), type = "l")
plot(rank(myhdi$hdi), rank(myhdi$unemployment),
xlab="Rank of State Performance Index",
ylab="Rank of State Unemployment Performance"
, type="n"
)
text(rank(myhdi$hdi), rank(myhdi$unemployment), rownames(myhdi))
abline(lm(rank(myhdi$hdi) ~rank(myhdi$unemployment)))
myhdi_scale = scale(myhdi) |> as.data.frame()
mymod = lm(hdi ~ unemployment +
establish +
employment +
realwages,
#data = myhdi_scale
data = myhdi)
summary(mymod)
Call:
lm(formula = hdi ~ unemployment + establish + employment + realwages,
data = myhdi)
Residuals:
Min 1Q Median
-0.00000000000015049 -0.00000000000000344 0.00000000000000402
3Q Max
0.00000000000000871 0.00000000000002395
Coefficients:
Estimate Std. Error t value
(Intercept) 0.000000000000055718 0.000000000000155207 0.36
unemployment 0.999999999999999001 0.000000000000000705 1418315641389190.50
establish 0.999999999999998890 0.000000000000001699 588436433807179.25
employment 0.999999999999998224 0.000000000000005238 190921133365290.50
realwages 1.000000000000000888 0.000000000000007302 136943094457266.08
Pr(>|t|)
(Intercept) 0.72
unemployment <0.0000000000000002 ***
establish <0.0000000000000002 ***
employment <0.0000000000000002 ***
realwages <0.0000000000000002 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.0000000000000239 on 46 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 6.05e+29 on 4 and 46 DF, p-value: <0.0000000000000002
#reghelper::beta(mymod)
barplot(mymod$coefficients[2:5], names.arg = colnames(myhdi[,2:5]))
cor(myhdi)
hdi establish employment realwages unemployment
hdi 1.000 0.375 0.316 0.266 0.804
establish 0.375 1.000 0.509 0.347 -0.207
employment 0.316 0.509 1.000 0.598 -0.154
realwages 0.266 0.347 0.598 1.000 -0.106
unemployment 0.804 -0.207 -0.154 -0.106 1.000
mymod = lm(rank(myhdi$hdi) ~rank(myhdi$unemployment) +
rank(myhdi$establish) +
rank(myhdi$employment) +
rank(myhdi$realwages))
summary(mymod)
Call:
lm(formula = rank(myhdi$hdi) ~ rank(myhdi$unemployment) + rank(myhdi$establish) +
rank(myhdi$employment) + rank(myhdi$realwages))
Residuals:
Min 1Q Median 3Q Max
-10.405 -2.350 0.009 1.864 15.538
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -14.8792 2.1174 -7.03 0.000000008296 ***
rank(myhdi$unemployment) 0.9018 0.0442 20.38 < 0.0000000000000002 ***
rank(myhdi$establish) 0.4091 0.0485 8.44 0.000000000067 ***
rank(myhdi$employment) 0.1177 0.0576 2.04 0.047 *
rank(myhdi$realwages) 0.1437 0.0545 2.63 0.011 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.55 on 46 degrees of freedom
Multiple R-squared: 0.914, Adjusted R-squared: 0.906
F-statistic: 122 on 4 and 46 DF, p-value: <0.0000000000000002
#reghelper::beta(mymod)
barplot(mymod$coefficients[2:5], names.arg = colnames(myhdi[,2:5]))
library(randomForest)
rand_frst <- randomForest(hdi~
establish +
employment +
realwages +
#realgdp +
unemployment,
data=myhdi,
ntree=100,
keep.forest=TRUE,
importance=TRUE)
iscores = randomForest::varImpPlot(rand_frst,
sort=FALSE,
#conditional = TRUE,
scale = FALSE,
main="Variable Importance Plot")
barplot(iscores[,1])
library(e1071)
svmfit = randomForest(x= myhdi[,2:5],y= myhdi[,1], kernel = "linear", scale = TRUE)
var_imp <- caret::varImp(svmfit, scale = FALSE)
var_imp
#sort the score in decreasing order
var_imp_df <- data.frame(cbind(variable = rownames(var_imp), score = var_imp[,1]))
var_imp_df$score <- as.double(var_imp_df$score)
var_imp_df[order(var_imp_df$score,decreasing = TRUE),]
ggplot(var_imp_df, aes(x=reorder(variable, score), y=score)) +
geom_point() +
geom_segment(aes(x=variable,xend=variable,y=0,yend=score)) +
ylab("IncNodePurity") +
xlab("Variable Name") +
coord_flip()
library(rminer)
M <- fit(hdi~., data=myhdi,
model="svm",
#model = "lm",
kpar=list(sigma=0.10), C=2)
svm.imp <- rminer::Importance(M, data=myhdi)
#svm.imp$imp
barplot(svm.imp$imp[2:5], names.arg = names(myhdi)[2:5])