Spaces:
Runtime error
Runtime error
misc updates
Browse files
app.py
CHANGED
|
@@ -7,8 +7,8 @@ from io import StringIO
|
|
| 7 |
|
| 8 |
import numpy as np
|
| 9 |
import streamlit as st
|
| 10 |
-
|
| 11 |
import textdescriptives as td
|
|
|
|
| 12 |
from data_viewer import DataViewer
|
| 13 |
from options import (
|
| 14 |
all_model_size_options_pretty_to_short,
|
|
@@ -27,17 +27,23 @@ with col1:
|
|
| 27 |
st.title("Extract Text Statistics")
|
| 28 |
with col2:
|
| 29 |
st.image(
|
| 30 |
-
"https://github.com/HLasse/TextDescriptives/raw/main/docs/_static/icon.png"
|
|
|
|
| 31 |
)
|
| 32 |
|
| 33 |
st.write(
|
| 34 |
"Calculate a large variety of statistics from text via the "
|
| 35 |
"[**TextDescriptives**](https://github.com/HLasse/TextDescriptives) python package "
|
| 36 |
-
f"(v/{td.__version__})
|
| 37 |
"Includes descriptive statistics and metrics related to readability, "
|
| 38 |
"information theory, text coherence and text quality."
|
| 39 |
)
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
st.caption(
|
| 42 |
"Hansen, L., Olsen, L. R., & Enevoldsen, K. (2023). TextDescriptives: A Python package for "
|
| 43 |
"calculating a large variety of statistics from text. "
|
|
@@ -69,15 +75,14 @@ with st.form(key="settings_form"):
|
|
| 69 |
string_data = StringIO(uploaded_file.getvalue().decode("utf-8")).read()
|
| 70 |
|
| 71 |
else:
|
| 72 |
-
default_text = """
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
Feeling bad about yourself - or that you are a failure or have let yourself or your family down?"""
|
| 78 |
|
| 79 |
string_data = st.text_area(
|
| 80 |
-
label="Enter text", value=default_text, height=
|
| 81 |
)
|
| 82 |
|
| 83 |
# Row of selectors
|
|
@@ -114,6 +119,7 @@ Feeling bad about yourself - or that you are a failure or have let yourself or y
|
|
| 114 |
"See the [**documentation**](https://hlasse.github.io/TextDescriptives/) for "
|
| 115 |
"information on the available metrics."
|
| 116 |
)
|
|
|
|
| 117 |
# This shouldn't happen but better safe than sorry
|
| 118 |
if isinstance(metrics, list) and not metrics:
|
| 119 |
metrics = None
|
|
@@ -180,7 +186,9 @@ import textdescriptives as td
|
|
| 180 |
|
| 181 |
# Given a string of text and the settings
|
| 182 |
text = "..."
|
| 183 |
-
|
|
|
|
|
|
|
| 184 |
split_by_newline = True
|
| 185 |
|
| 186 |
# Remove whitespace from both ends of the string
|
|
@@ -199,7 +207,9 @@ lines = [l for l in lines if l]
|
|
| 199 |
# Extract metrics for each line
|
| 200 |
extracted_metrics = td.extract_metrics(
|
| 201 |
text=lines,
|
| 202 |
-
|
|
|
|
|
|
|
| 203 |
)
|
| 204 |
|
| 205 |
""",
|
|
|
|
| 7 |
|
| 8 |
import numpy as np
|
| 9 |
import streamlit as st
|
|
|
|
| 10 |
import textdescriptives as td
|
| 11 |
+
|
| 12 |
from data_viewer import DataViewer
|
| 13 |
from options import (
|
| 14 |
all_model_size_options_pretty_to_short,
|
|
|
|
| 27 |
st.title("Extract Text Statistics")
|
| 28 |
with col2:
|
| 29 |
st.image(
|
| 30 |
+
"https://github.com/HLasse/TextDescriptives/raw/main/docs/_static/icon.png",
|
| 31 |
+
width=125
|
| 32 |
)
|
| 33 |
|
| 34 |
st.write(
|
| 35 |
"Calculate a large variety of statistics from text via the "
|
| 36 |
"[**TextDescriptives**](https://github.com/HLasse/TextDescriptives) python package "
|
| 37 |
+
f"(v/{td.__version__}) and download the results as a .csv file. "
|
| 38 |
"Includes descriptive statistics and metrics related to readability, "
|
| 39 |
"information theory, text coherence and text quality."
|
| 40 |
)
|
| 41 |
|
| 42 |
+
st.write(
|
| 43 |
+
"The source code for this application can be found on [**GitHub**](https://github.com/HLasse/TextDescriptives_app). "
|
| 44 |
+
"If you have feedback, please open an [issue](https://github.com/HLasse/textdescriptives_app/issues)."
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
st.caption(
|
| 48 |
"Hansen, L., Olsen, L. R., & Enevoldsen, K. (2023). TextDescriptives: A Python package for "
|
| 49 |
"calculating a large variety of statistics from text. "
|
|
|
|
| 75 |
string_data = StringIO(uploaded_file.getvalue().decode("utf-8")).read()
|
| 76 |
|
| 77 |
else:
|
| 78 |
+
default_text = """Hello, morning dew. The grass whispers low.
|
| 79 |
+
I'm here to dance. The gentle breeze does show.
|
| 80 |
+
Good morning, world. The birds sing in delight.
|
| 81 |
+
Let's spread our wings. The butterflies take flight.
|
| 82 |
+
Nature's chorus sings, a symphony of light."""
|
|
|
|
| 83 |
|
| 84 |
string_data = st.text_area(
|
| 85 |
+
label="Enter text", value=default_text, height=145, max_chars=None
|
| 86 |
)
|
| 87 |
|
| 88 |
# Row of selectors
|
|
|
|
| 119 |
"See the [**documentation**](https://hlasse.github.io/TextDescriptives/) for "
|
| 120 |
"information on the available metrics."
|
| 121 |
)
|
| 122 |
+
|
| 123 |
# This shouldn't happen but better safe than sorry
|
| 124 |
if isinstance(metrics, list) and not metrics:
|
| 125 |
metrics = None
|
|
|
|
| 186 |
|
| 187 |
# Given a string of text and the settings
|
| 188 |
text = "..."
|
| 189 |
+
language = "..."
|
| 190 |
+
model_size = "..."
|
| 191 |
+
metrics = [...]
|
| 192 |
split_by_newline = True
|
| 193 |
|
| 194 |
# Remove whitespace from both ends of the string
|
|
|
|
| 207 |
# Extract metrics for each line
|
| 208 |
extracted_metrics = td.extract_metrics(
|
| 209 |
text=lines,
|
| 210 |
+
lang=language,
|
| 211 |
+
spacy_model_size=model_size,
|
| 212 |
+
metrics=metrics
|
| 213 |
)
|
| 214 |
|
| 215 |
""",
|