Ludvig
commited on
Commit
·
889ab6a
1
Parent(s):
3d03352
Text and formatting
Browse files- app.py +20 -9
- cvms_version.R +1 -1
- generate_data.R +24 -17
- plot.R +189 -126
- text_sections.py +38 -12
app.py
CHANGED
|
@@ -33,6 +33,14 @@ from text_sections import (
|
|
| 33 |
enter_count_data_text,
|
| 34 |
)
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# Create temporary directory
|
| 38 |
|
|
@@ -103,7 +111,9 @@ if input_choice == "Upload predictions":
|
|
| 103 |
st.session_state["step"] = 1
|
| 104 |
else:
|
| 105 |
st.session_state["step"] = 0
|
| 106 |
-
st.markdown(
|
|
|
|
|
|
|
| 107 |
|
| 108 |
if st.session_state["step"] >= 1:
|
| 109 |
# Read and store (tmp) data
|
|
@@ -122,8 +132,8 @@ if input_choice == "Upload predictions":
|
|
| 122 |
elif input_choice == "Upload counts":
|
| 123 |
with st.form(key="data_form"):
|
| 124 |
upload_counts_text()
|
| 125 |
-
data_path = st.file_uploader("Upload
|
| 126 |
-
if st.form_submit_button(label="Use
|
| 127 |
if data_path:
|
| 128 |
st.session_state["step"] = 1
|
| 129 |
else:
|
|
@@ -139,9 +149,7 @@ elif input_choice == "Upload counts":
|
|
| 139 |
prediction_col = st.selectbox(
|
| 140 |
"Predictions column", options=list(df.columns)
|
| 141 |
)
|
| 142 |
-
n_col = st.selectbox(
|
| 143 |
-
"Counts column", options=list(df.columns)
|
| 144 |
-
)
|
| 145 |
|
| 146 |
if st.form_submit_button(label="Set columns"):
|
| 147 |
st.session_state["step"] = 2
|
|
@@ -286,7 +294,7 @@ if st.session_state["step"] >= 2:
|
|
| 286 |
col1, col2, col3 = st.columns([2, 2, 2])
|
| 287 |
with col2:
|
| 288 |
st.write(df.head(5))
|
| 289 |
-
st.write(f"{df.shape} (first 5 rows)
|
| 290 |
|
| 291 |
else:
|
| 292 |
st.session_state["entered_counts"].to_csv(data_store_path)
|
|
@@ -314,7 +322,10 @@ if st.session_state["step"] >= 2:
|
|
| 314 |
"of another class is excluded.",
|
| 315 |
)
|
| 316 |
with col2:
|
| 317 |
-
if
|
|
|
|
|
|
|
|
|
|
| 318 |
prob_of_class = st.selectbox(
|
| 319 |
"Probabilities are of (not working)",
|
| 320 |
options=st.session_state["classes"],
|
|
@@ -401,7 +412,7 @@ if st.session_state["step"] >= 2:
|
|
| 401 |
with col3:
|
| 402 |
dpi = st.number_input("DPI (not working)", value=320)
|
| 403 |
|
| 404 |
-
if st.form_submit_button(label="
|
| 405 |
st.session_state["step"] = 3
|
| 406 |
|
| 407 |
if st.session_state["step"] >= 3:
|
|
|
|
| 33 |
enter_count_data_text,
|
| 34 |
)
|
| 35 |
|
| 36 |
+
st.markdown("""
|
| 37 |
+
<style>
|
| 38 |
+
.small-font {
|
| 39 |
+
font-size:0.85em !important;
|
| 40 |
+
}
|
| 41 |
+
</style>
|
| 42 |
+
""", unsafe_allow_html=True)
|
| 43 |
+
|
| 44 |
|
| 45 |
# Create temporary directory
|
| 46 |
|
|
|
|
| 111 |
st.session_state["step"] = 1
|
| 112 |
else:
|
| 113 |
st.session_state["step"] = 0
|
| 114 |
+
st.markdown(
|
| 115 |
+
"Please upload a file first (or **generate** some random data to try the function)."
|
| 116 |
+
)
|
| 117 |
|
| 118 |
if st.session_state["step"] >= 1:
|
| 119 |
# Read and store (tmp) data
|
|
|
|
| 132 |
elif input_choice == "Upload counts":
|
| 133 |
with st.form(key="data_form"):
|
| 134 |
upload_counts_text()
|
| 135 |
+
data_path = st.file_uploader("Upload your counts", type=["csv"])
|
| 136 |
+
if st.form_submit_button(label="Use counts"):
|
| 137 |
if data_path:
|
| 138 |
st.session_state["step"] = 1
|
| 139 |
else:
|
|
|
|
| 149 |
prediction_col = st.selectbox(
|
| 150 |
"Predictions column", options=list(df.columns)
|
| 151 |
)
|
| 152 |
+
n_col = st.selectbox("Counts column", options=list(df.columns))
|
|
|
|
|
|
|
| 153 |
|
| 154 |
if st.form_submit_button(label="Set columns"):
|
| 155 |
st.session_state["step"] = 2
|
|
|
|
| 294 |
col1, col2, col3 = st.columns([2, 2, 2])
|
| 295 |
with col2:
|
| 296 |
st.write(df.head(5))
|
| 297 |
+
st.write(f"{df.shape} (Showing first 5 rows)")
|
| 298 |
|
| 299 |
else:
|
| 300 |
st.session_state["entered_counts"].to_csv(data_store_path)
|
|
|
|
| 322 |
"of another class is excluded.",
|
| 323 |
)
|
| 324 |
with col2:
|
| 325 |
+
if (
|
| 326 |
+
st.session_state["input_type"] == "data"
|
| 327 |
+
and predictions_are_probabilities
|
| 328 |
+
):
|
| 329 |
prob_of_class = st.selectbox(
|
| 330 |
"Probabilities are of (not working)",
|
| 331 |
options=st.session_state["classes"],
|
|
|
|
| 412 |
with col3:
|
| 413 |
dpi = st.number_input("DPI (not working)", value=320)
|
| 414 |
|
| 415 |
+
if st.form_submit_button(label="Generate plot"):
|
| 416 |
st.session_state["step"] = 3
|
| 417 |
|
| 418 |
if st.session_state["step"] >= 3:
|
cvms_version.R
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
print(packageVersion("cvms"))
|
|
|
|
| 1 |
+
print(packageVersion("cvms"))
|
generate_data.R
CHANGED
|
@@ -3,25 +3,32 @@ library(optparse)
|
|
| 3 |
library(cvms)
|
| 4 |
|
| 5 |
option_list <- list(
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
)
|
| 16 |
-
|
| 17 |
-
opt_parser <- OptionParser(option_list=option_list)
|
| 18 |
opt <- parse_args(opt_parser)
|
| 19 |
|
| 20 |
print(opt)
|
| 21 |
|
| 22 |
# Set seed if given
|
| 23 |
-
if (!is.null(opt$seed)){
|
| 24 |
-
|
| 25 |
}
|
| 26 |
|
| 27 |
# Make fairly certain predictions
|
|
@@ -31,16 +38,16 @@ rcertain <- function(n) {
|
|
| 31 |
|
| 32 |
# Generate data
|
| 33 |
data <- cvms::multiclass_probability_tibble(
|
| 34 |
-
num_classes=opt$num_classes,
|
| 35 |
-
num_observations=opt$num_observations,
|
| 36 |
apply_softmax = TRUE,
|
| 37 |
FUN = rcertain,
|
| 38 |
class_name = "c",
|
| 39 |
add_predicted_classes = TRUE,
|
| 40 |
add_targets = TRUE
|
| 41 |
-
)
|
| 42 |
|
| 43 |
data <- data[, c("Predicted Class", "Target")]
|
| 44 |
|
| 45 |
# Write to disk
|
| 46 |
-
write.csv(data, file = opt$out_path, row.names=FALSE)
|
|
|
|
| 3 |
library(cvms)
|
| 4 |
|
| 5 |
option_list <- list(
|
| 6 |
+
make_option(c("--out_path"),
|
| 7 |
+
type = "character",
|
| 8 |
+
help = "Path to save data at."
|
| 9 |
+
),
|
| 10 |
+
make_option(c("--num_classes"),
|
| 11 |
+
type = "integer",
|
| 12 |
+
help = "Number of classes."
|
| 13 |
+
),
|
| 14 |
+
make_option(c("--num_observations"),
|
| 15 |
+
type = "integer",
|
| 16 |
+
help = "Number of observations."
|
| 17 |
+
),
|
| 18 |
+
make_option(c("--seed"),
|
| 19 |
+
type = "integer",
|
| 20 |
+
help = "Number of observations."
|
| 21 |
+
)
|
| 22 |
)
|
| 23 |
+
|
| 24 |
+
opt_parser <- OptionParser(option_list = option_list)
|
| 25 |
opt <- parse_args(opt_parser)
|
| 26 |
|
| 27 |
print(opt)
|
| 28 |
|
| 29 |
# Set seed if given
|
| 30 |
+
if (!is.null(opt$seed)) {
|
| 31 |
+
set.seed(opt$seed)
|
| 32 |
}
|
| 33 |
|
| 34 |
# Make fairly certain predictions
|
|
|
|
| 38 |
|
| 39 |
# Generate data
|
| 40 |
data <- cvms::multiclass_probability_tibble(
|
| 41 |
+
num_classes = opt$num_classes,
|
| 42 |
+
num_observations = opt$num_observations,
|
| 43 |
apply_softmax = TRUE,
|
| 44 |
FUN = rcertain,
|
| 45 |
class_name = "c",
|
| 46 |
add_predicted_classes = TRUE,
|
| 47 |
add_targets = TRUE
|
| 48 |
+
)
|
| 49 |
|
| 50 |
data <- data[, c("Predicted Class", "Target")]
|
| 51 |
|
| 52 |
# Write to disk
|
| 53 |
+
write.csv(data, file = opt$out_path, row.names = FALSE)
|
plot.R
CHANGED
|
@@ -3,59 +3,107 @@ library(optparse)
|
|
| 3 |
library(cvms)
|
| 4 |
library(dplyr)
|
| 5 |
library(ggplot2)
|
| 6 |
-
|
| 7 |
option_list <- list(
|
| 8 |
-
make_option(c("--data_path"),
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
make_option(c("--
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
make_option(c("--
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
make_option(c("--
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
make_option(c("--
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
make_option(c("--
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
make_option(c("--
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
make_option(c("--
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
make_option(c("--
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
make_option(c("--
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
make_option(c("--
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
make_option(c("--
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
)
|
| 57 |
|
| 58 |
-
opt_parser <- OptionParser(option_list=option_list)
|
| 59 |
opt <- parse_args(opt_parser)
|
| 60 |
|
| 61 |
print(opt)
|
|
@@ -66,129 +114,144 @@ data_are_counts <- opt$data_are_counts
|
|
| 66 |
target_col <- stringr::str_squish(opt$target_col)
|
| 67 |
target_col <- stringr::str_replace_all(target_col, " ", ".")
|
| 68 |
prediction_col <- stringr::str_squish(opt$prediction_col)
|
| 69 |
-
prediction_col
|
| 70 |
|
| 71 |
n_col <- NULL
|
| 72 |
-
if (!is.null(opt$n_col)){
|
| 73 |
n_col <- stringr::str_squish(opt$n_col)
|
| 74 |
-
n_col
|
| 75 |
}
|
| 76 |
|
| 77 |
# Read and prepare data frame
|
| 78 |
-
df <- tryCatch(
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
| 85 |
print(df)
|
| 86 |
|
| 87 |
df <- dplyr::as_tibble(df)
|
| 88 |
print(df)
|
| 89 |
df[[target_col]] <- as.character(df[[target_col]])
|
| 90 |
|
| 91 |
-
if (isTRUE(data_are_counts)){
|
| 92 |
df[[prediction_col]] <- as.character(df[[prediction_col]])
|
| 93 |
}
|
| 94 |
|
| 95 |
-
# Predictions can be either probabilities or
|
| 96 |
# hard class predictions
|
| 97 |
-
if (is.integer(df[[prediction_col]]) || !is.numeric(df[[prediction_col]])){
|
| 98 |
-
all_present_classes <- sort(
|
| 99 |
-
c(
|
| 100 |
-
|
|
|
|
| 101 |
)
|
| 102 |
)
|
| 103 |
} else {
|
| 104 |
-
all_present_classes <- sort(
|
| 105 |
unique(df[[target_col]])
|
| 106 |
)
|
| 107 |
}
|
| 108 |
|
| 109 |
|
| 110 |
-
if (!is.null(opt$classes)){
|
| 111 |
-
classes <- as.character(
|
|
|
|
|
|
|
|
|
|
| 112 |
} else {
|
| 113 |
classes <- all_present_classes
|
| 114 |
}
|
| 115 |
-
print(paste0("Selected Classes: ", paste0(classes, collapse=", ")))
|
| 116 |
|
| 117 |
-
if (!isTRUE(data_are_counts)){
|
| 118 |
-
# We remove the unwanted classes from the confusion matrix
|
| 119 |
# (easier - possibly slower in edge cases)
|
| 120 |
family <- ifelse(length(all_present_classes) == 2, "binomial", "multinomial")
|
| 121 |
print(df)
|
| 122 |
|
| 123 |
# TODO : use prob_of_class to ensure probabilities are interpreted correctly!!
|
| 124 |
# Might need to invert them to get it to work!
|
| 125 |
-
evaluation <- tryCatch(
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
confusion_matrix <- evaluation[["Confusion Matrix"]][[1]]
|
| 140 |
-
|
| 141 |
} else {
|
| 142 |
confusion_matrix <- dplyr::rename(
|
| 143 |
-
df,
|
| 144 |
Target = !!target_col,
|
| 145 |
Prediction = !!prediction_col,
|
| 146 |
N = !!n_col
|
| 147 |
)
|
| 148 |
}
|
| 149 |
|
| 150 |
-
confusion_matrix
|
| 151 |
confusion_matrix,
|
| 152 |
Prediction %in% classes,
|
| 153 |
Target %in% classes
|
| 154 |
)
|
| 155 |
|
| 156 |
|
| 157 |
-
confusion_matrix_plot <- tryCatch(
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
library(cvms)
|
| 4 |
library(dplyr)
|
| 5 |
library(ggplot2)
|
| 6 |
+
|
| 7 |
option_list <- list(
|
| 8 |
+
make_option(c("--data_path"),
|
| 9 |
+
type = "character",
|
| 10 |
+
help = "Path to data file (.csv)."
|
| 11 |
+
),
|
| 12 |
+
make_option(c("--out_path"),
|
| 13 |
+
type = "character",
|
| 14 |
+
help = "Path to save confusion matrix plot at."
|
| 15 |
+
),
|
| 16 |
+
make_option(c("--data_are_counts"),
|
| 17 |
+
action = "store_true", default = FALSE,
|
| 18 |
+
help = "Indicates that `--data_path` contains counts, not predictions."
|
| 19 |
+
),
|
| 20 |
+
make_option(c("--target_col"),
|
| 21 |
+
type = "character",
|
| 22 |
+
help = "Target column"
|
| 23 |
+
),
|
| 24 |
+
make_option(c("--prediction_col"),
|
| 25 |
+
type = "character",
|
| 26 |
+
help = "Prediction column"
|
| 27 |
+
),
|
| 28 |
+
make_option(c("--n_col"),
|
| 29 |
+
type = "character",
|
| 30 |
+
help = "Count column (when `--data_are_counts`)."
|
| 31 |
+
),
|
| 32 |
+
make_option(c("--classes"),
|
| 33 |
+
type = "character",
|
| 34 |
+
help = "Comma-separated class names. Only these classes will be used - in the specified order."
|
| 35 |
+
),
|
| 36 |
+
make_option(c("--prob_of_class"),
|
| 37 |
+
type = "character",
|
| 38 |
+
help = "Name of class that probabilities are of."
|
| 39 |
+
),
|
| 40 |
+
make_option(c("--palette"),
|
| 41 |
+
type = "character",
|
| 42 |
+
help = "Color palette."
|
| 43 |
+
),
|
| 44 |
+
make_option(c("--width"),
|
| 45 |
+
type = "integer",
|
| 46 |
+
help = "Width of plot in pixels."
|
| 47 |
+
),
|
| 48 |
+
make_option(c("--height"),
|
| 49 |
+
type = "integer",
|
| 50 |
+
help = "Height of plot in pixels."
|
| 51 |
+
),
|
| 52 |
+
make_option(c("--dpi"),
|
| 53 |
+
type = "integer",
|
| 54 |
+
help = "DPI of plot."
|
| 55 |
+
),
|
| 56 |
+
make_option(c("--add_sums"),
|
| 57 |
+
action = "store_true", default = FALSE,
|
| 58 |
+
help = "Wether to add sum tiles."
|
| 59 |
+
),
|
| 60 |
+
make_option(c("--add_counts"),
|
| 61 |
+
action = "store_true", default = FALSE,
|
| 62 |
+
help = "Wether to add counts."
|
| 63 |
+
),
|
| 64 |
+
make_option(c("--add_normalized"),
|
| 65 |
+
action = "store_true", default = FALSE,
|
| 66 |
+
help = "Wether to add normalized counts (i.e. percentages)."
|
| 67 |
+
),
|
| 68 |
+
make_option(c("--add_row_percentages"),
|
| 69 |
+
action = "store_true", default = FALSE,
|
| 70 |
+
help = "Wether to add row percentages."
|
| 71 |
+
),
|
| 72 |
+
make_option(c("--add_col_percentages"),
|
| 73 |
+
action = "store_true", default = FALSE,
|
| 74 |
+
help = "Wether to add column percentages."
|
| 75 |
+
),
|
| 76 |
+
make_option(c("--add_zero_percentages"),
|
| 77 |
+
action = "store_true", default = FALSE,
|
| 78 |
+
help = "Wether to add percentages to zero-tiles."
|
| 79 |
+
),
|
| 80 |
+
make_option(c("--add_zero_text"),
|
| 81 |
+
action = "store_true", default = FALSE,
|
| 82 |
+
help = "Wether to add text to zero-tiles."
|
| 83 |
+
),
|
| 84 |
+
make_option(c("--add_zero_shading"),
|
| 85 |
+
action = "store_true", default = FALSE,
|
| 86 |
+
help = "Wether to add shading to zero-tiles."
|
| 87 |
+
),
|
| 88 |
+
make_option(c("--add_arrows"),
|
| 89 |
+
action = "store_true", default = FALSE,
|
| 90 |
+
help = "Wether to add arrows to row/sum percentages. Requires additional packages."
|
| 91 |
+
),
|
| 92 |
+
make_option(c("--counts_on_top"),
|
| 93 |
+
action = "store_true", default = FALSE,
|
| 94 |
+
help = "Wether to have the counts on top and normalized counts below."
|
| 95 |
+
),
|
| 96 |
+
make_option(c("--diag_percentages_only"),
|
| 97 |
+
action = "store_true", default = FALSE,
|
| 98 |
+
help = "Wether to only show diagonal row/column percentages."
|
| 99 |
+
),
|
| 100 |
+
make_option(c("--digits"),
|
| 101 |
+
type = "integer",
|
| 102 |
+
help = "Number of digits to show for percentages."
|
| 103 |
+
)
|
| 104 |
)
|
| 105 |
|
| 106 |
+
opt_parser <- OptionParser(option_list = option_list)
|
| 107 |
opt <- parse_args(opt_parser)
|
| 108 |
|
| 109 |
print(opt)
|
|
|
|
| 114 |
target_col <- stringr::str_squish(opt$target_col)
|
| 115 |
target_col <- stringr::str_replace_all(target_col, " ", ".")
|
| 116 |
prediction_col <- stringr::str_squish(opt$prediction_col)
|
| 117 |
+
prediction_col <- stringr::str_replace_all(prediction_col, " ", ".")
|
| 118 |
|
| 119 |
n_col <- NULL
|
| 120 |
+
if (!is.null(opt$n_col)) {
|
| 121 |
n_col <- stringr::str_squish(opt$n_col)
|
| 122 |
+
n_col <- stringr::str_replace_all(n_col, " ", ".")
|
| 123 |
}
|
| 124 |
|
| 125 |
# Read and prepare data frame
|
| 126 |
+
df <- tryCatch(
|
| 127 |
+
{
|
| 128 |
+
read.csv(opt$data_path)
|
| 129 |
+
},
|
| 130 |
+
error = function(e) {
|
| 131 |
+
print(paste0("Failed to read data from ", opt$data_path))
|
| 132 |
+
print(e)
|
| 133 |
+
stop(e)
|
| 134 |
+
}
|
| 135 |
+
)
|
| 136 |
print(df)
|
| 137 |
|
| 138 |
df <- dplyr::as_tibble(df)
|
| 139 |
print(df)
|
| 140 |
df[[target_col]] <- as.character(df[[target_col]])
|
| 141 |
|
| 142 |
+
if (isTRUE(data_are_counts)) {
|
| 143 |
df[[prediction_col]] <- as.character(df[[prediction_col]])
|
| 144 |
}
|
| 145 |
|
| 146 |
+
# Predictions can be either probabilities or
|
| 147 |
# hard class predictions
|
| 148 |
+
if (is.integer(df[[prediction_col]]) || !is.numeric(df[[prediction_col]])) {
|
| 149 |
+
all_present_classes <- sort(
|
| 150 |
+
c(
|
| 151 |
+
unique(df[[target_col]]),
|
| 152 |
+
unique(df[[prediction_col]])
|
| 153 |
)
|
| 154 |
)
|
| 155 |
} else {
|
| 156 |
+
all_present_classes <- sort(
|
| 157 |
unique(df[[target_col]])
|
| 158 |
)
|
| 159 |
}
|
| 160 |
|
| 161 |
|
| 162 |
+
if (!is.null(opt$classes)) {
|
| 163 |
+
classes <- as.character(
|
| 164 |
+
unlist(strsplit(opt$classes, "[,:]")),
|
| 165 |
+
recursive = TRUE
|
| 166 |
+
)
|
| 167 |
} else {
|
| 168 |
classes <- all_present_classes
|
| 169 |
}
|
| 170 |
+
print(paste0("Selected Classes: ", paste0(classes, collapse = ", ")))
|
| 171 |
|
| 172 |
+
if (!isTRUE(data_are_counts)) {
|
| 173 |
+
# We remove the unwanted classes from the confusion matrix
|
| 174 |
# (easier - possibly slower in edge cases)
|
| 175 |
family <- ifelse(length(all_present_classes) == 2, "binomial", "multinomial")
|
| 176 |
print(df)
|
| 177 |
|
| 178 |
# TODO : use prob_of_class to ensure probabilities are interpreted correctly!!
|
| 179 |
# Might need to invert them to get it to work!
|
| 180 |
+
evaluation <- tryCatch(
|
| 181 |
+
{
|
| 182 |
+
cvms::evaluate(
|
| 183 |
+
data = df,
|
| 184 |
+
target_col = target_col,
|
| 185 |
+
prediction_cols = prediction_col,
|
| 186 |
+
type = family,
|
| 187 |
+
)
|
| 188 |
+
},
|
| 189 |
+
error = function(e) {
|
| 190 |
+
print("Failed to evaluate data.")
|
| 191 |
+
print(head(df, 5))
|
| 192 |
+
print(e)
|
| 193 |
+
stop(e)
|
| 194 |
+
}
|
| 195 |
+
)
|
| 196 |
|
| 197 |
confusion_matrix <- evaluation[["Confusion Matrix"]][[1]]
|
|
|
|
| 198 |
} else {
|
| 199 |
confusion_matrix <- dplyr::rename(
|
| 200 |
+
df,
|
| 201 |
Target = !!target_col,
|
| 202 |
Prediction = !!prediction_col,
|
| 203 |
N = !!n_col
|
| 204 |
)
|
| 205 |
}
|
| 206 |
|
| 207 |
+
confusion_matrix <- dplyr::filter(
|
| 208 |
confusion_matrix,
|
| 209 |
Prediction %in% classes,
|
| 210 |
Target %in% classes
|
| 211 |
)
|
| 212 |
|
| 213 |
|
| 214 |
+
confusion_matrix_plot <- tryCatch(
|
| 215 |
+
{
|
| 216 |
+
cvms::plot_confusion_matrix(
|
| 217 |
+
confusion_matrix,
|
| 218 |
+
class_order = classes,
|
| 219 |
+
add_sums = opt$add_sums,
|
| 220 |
+
add_counts = opt$add_counts,
|
| 221 |
+
add_normalized = opt$add_normalized,
|
| 222 |
+
add_row_percentages = opt$add_row_percentages,
|
| 223 |
+
add_col_percentages = opt$add_col_percentages,
|
| 224 |
+
rm_zero_percentages = !opt$add_zero_percentages,
|
| 225 |
+
rm_zero_text = !opt$add_zero_text,
|
| 226 |
+
add_zero_shading = opt$add_zero_shading,
|
| 227 |
+
add_arrows = opt$add_arrows,
|
| 228 |
+
counts_on_top = opt$counts_on_top,
|
| 229 |
+
diag_percentages_only = opt$diag_percentages_only,
|
| 230 |
+
digits = as.integer(opt$digits),
|
| 231 |
+
palette = opt$palette
|
| 232 |
+
)
|
| 233 |
+
},
|
| 234 |
+
error = function(e) {
|
| 235 |
+
print("Failed to create plot from confusion matrix.")
|
| 236 |
+
print(confusion_matrix)
|
| 237 |
+
print(e)
|
| 238 |
+
stop(e)
|
| 239 |
+
}
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
tryCatch(
|
| 243 |
+
{
|
| 244 |
+
ggplot2::ggsave(
|
| 245 |
+
opt$out_path,
|
| 246 |
+
width = opt$width,
|
| 247 |
+
height = opt$height,
|
| 248 |
+
dpi = opt$dpi,
|
| 249 |
+
units = "px"
|
| 250 |
+
)
|
| 251 |
+
},
|
| 252 |
+
error = function(e) {
|
| 253 |
+
print(paste0("Failed to ggsave plot to: ", opt$out_path))
|
| 254 |
+
print(e)
|
| 255 |
+
stop(e)
|
| 256 |
+
}
|
| 257 |
+
)
|
text_sections.py
CHANGED
|
@@ -23,34 +23,59 @@ def intro_text():
|
|
| 23 |
col1, col2 = st.columns([8, 2])
|
| 24 |
with col1:
|
| 25 |
st.title("Plot Confusion Matrix")
|
| 26 |
-
st.
|
| 27 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
)
|
| 29 |
with col2:
|
| 30 |
st.image(
|
| 31 |
"https://github.com/LudvigOlsen/cvms/raw/master/man/figures/cvms_logo_242x280_250dpi.png",
|
| 32 |
width=125,
|
| 33 |
)
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
st.write(
|
| 36 |
"The plot is created with the [**cvms**](https://github.com/LudvigOlsen/cvms) R package "
|
| 37 |
f"(v/{get_cvms_version()}, LR Olsen & HB Zachariae, 2019)."
|
| 38 |
-
)
|
| 39 |
|
| 40 |
-
st.
|
| 41 |
-
|
| 42 |
-
"
|
| 43 |
-
"While we, the authors,
|
| 44 |
-
"*no guarantees* about the privacy of your data (
|
| 45 |
"Please do not upload sensitive data. The application "
|
| 46 |
-
"only requires
|
|
|
|
|
|
|
| 47 |
)
|
| 48 |
|
| 49 |
|
| 50 |
def generate_data_text():
|
| 51 |
st.subheader("Generate data")
|
| 52 |
st.write(
|
| 53 |
-
"
|
| 54 |
"Select a number of classes and observations, and you're ready to go! "
|
| 55 |
)
|
| 56 |
|
|
@@ -77,6 +102,7 @@ def upload_counts_text():
|
|
| 77 |
"See example of such a .csv file [here] (TODO). "
|
| 78 |
)
|
| 79 |
|
|
|
|
| 80 |
def upload_predictions_text():
|
| 81 |
st.subheader("Upload your predictions")
|
| 82 |
st.markdown(
|
|
@@ -85,7 +111,7 @@ def upload_predictions_text():
|
|
| 85 |
"Targets will be converted into strings. \n\n"
|
| 86 |
"2) A `prediction` column. \n"
|
| 87 |
"Predictions can be probabilities (binary classification only) or class predictions. \n\n"
|
| 88 |
-
"Other columns are currently ignored. \n\n"
|
| 89 |
"You will have the option to select the names of these two columns, so don't "
|
| 90 |
"worry too much about the column names in the uploaded data."
|
| 91 |
)
|
|
|
|
| 23 |
col1, col2 = st.columns([8, 2])
|
| 24 |
with col1:
|
| 25 |
st.title("Plot Confusion Matrix")
|
| 26 |
+
st.markdown(
|
| 27 |
+
"A confusion matrix plot is a great tool for inspecting your "
|
| 28 |
+
"machine learning model's performance on a classification task. "
|
| 29 |
+
"This application enables you to plot a confusion matrix on your own data, "
|
| 30 |
+
"**without a single line of code**. \n\n"
|
| 31 |
+
"It's designed for high flexibility AND quick results with good default settings.\n\n"
|
| 32 |
)
|
| 33 |
with col2:
|
| 34 |
st.image(
|
| 35 |
"https://github.com/LudvigOlsen/cvms/raw/master/man/figures/cvms_logo_242x280_250dpi.png",
|
| 36 |
width=125,
|
| 37 |
)
|
| 38 |
+
st.markdown("""---""")
|
| 39 |
+
col1, col2 = st.columns(2)
|
| 40 |
+
with col1:
|
| 41 |
+
st.subheader("Have your data ready?")
|
| 42 |
+
st.markdown( # TODO: Make A,B, etc. icons
|
| 43 |
+
"Upload a csv file with either: \n\n"
|
| 44 |
+
"A) **Targets** and **predictions**. \n\n"
|
| 45 |
+
"B) Existing confusion matrix **counts**. \n\n"
|
| 46 |
+
"--> Specify the columns to use.\n\n"
|
| 47 |
+
"--> Press **Generate plot**.\n\n"
|
| 48 |
+
)
|
| 49 |
+
with col2:
|
| 50 |
+
st.subheader("No data to upload?")
|
| 51 |
+
st.markdown(
|
| 52 |
+
"No worries! Either: \n\n"
|
| 53 |
+
"C) **Input** your counts directly! \n\n"
|
| 54 |
+
"D) **Generate* some data with **very** easy controls! \n\n"
|
| 55 |
+
"--> Press **Generate plot**.\n\n"
|
| 56 |
+
)
|
| 57 |
+
st.markdown("""---""")
|
| 58 |
st.write(
|
| 59 |
"The plot is created with the [**cvms**](https://github.com/LudvigOlsen/cvms) R package "
|
| 60 |
f"(v/{get_cvms_version()}, LR Olsen & HB Zachariae, 2019)."
|
| 61 |
+
) # TODO Add citation stuff
|
| 62 |
|
| 63 |
+
st.markdown(
|
| 64 |
+
'<p class="small-font">'
|
| 65 |
+
"DATA PRIVACY: For technical reasons, the uploaded data is temporarily stored "
|
| 66 |
+
"on the server. While we, the authors, won't access your data, we make "
|
| 67 |
+
"*no guarantees* about the privacy of your data (not our servers). "
|
| 68 |
"Please do not upload sensitive data. The application "
|
| 69 |
+
"only requires either predictions and targets or counts. "
|
| 70 |
+
"</p>",
|
| 71 |
+
unsafe_allow_html=True,
|
| 72 |
)
|
| 73 |
|
| 74 |
|
| 75 |
def generate_data_text():
|
| 76 |
st.subheader("Generate data")
|
| 77 |
st.write(
|
| 78 |
+
"Quickly try the application by generating a dataset with targets and predictions. "
|
| 79 |
"Select a number of classes and observations, and you're ready to go! "
|
| 80 |
)
|
| 81 |
|
|
|
|
| 102 |
"See example of such a .csv file [here] (TODO). "
|
| 103 |
)
|
| 104 |
|
| 105 |
+
|
| 106 |
def upload_predictions_text():
|
| 107 |
st.subheader("Upload your predictions")
|
| 108 |
st.markdown(
|
|
|
|
| 111 |
"Targets will be converted into strings. \n\n"
|
| 112 |
"2) A `prediction` column. \n"
|
| 113 |
"Predictions can be probabilities (binary classification only) or class predictions. \n\n"
|
| 114 |
+
"Other columns are currently ignored. \n\n"
|
| 115 |
"You will have the option to select the names of these two columns, so don't "
|
| 116 |
"worry too much about the column names in the uploaded data."
|
| 117 |
)
|