Skip to content

Commit 26dff18

Browse files
committed
Add spanish as an example
Adding a new language without nlp requires now only to fill out the pieces: - define a list of month names to support date recognition - add it to joex' dockerfile to be available for tesseract - update the solr migration/field definitions - update the elm file so it shows up on the client
1 parent 360cad3 commit 26dff18

8 files changed

Lines changed: 49 additions & 2 deletions

File tree

docker/joex-base.dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ RUN apk add --no-cache openjdk11-jre \
1616
tesseract-ocr-data-deu \
1717
tesseract-ocr-data-fra \
1818
tesseract-ocr-data-ita \
19+
tesseract-ocr-data-spa \
1920
unpaper \
2021
wkhtmltopdf \
2122
libreoffice \

modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ object DateFind {
6565
case Language.German => p1.or(p0).or(p2)
6666
case Language.French => p1.or(p0).or(p2)
6767
case Language.Italian => p1.or(p0).or(p2)
68+
case Language.Spanish => p1.or(p0).or(p2)
6869
}
6970
p.read(parts) match {
7071
case Result.Success(sds, _) =>

modules/analysis/src/main/scala/docspell/analysis/date/MonthName.scala

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ object MonthName {
2222
french
2323
case Language.Italian =>
2424
italian
25+
case Language.Spanish =>
26+
spanish
2527
}
2628

2729
private val numbers = List(
@@ -98,4 +100,19 @@ object MonthName {
98100
List("nov", "novembre"),
99101
List("dic", "dicembre")
100102
)
103+
104+
private val spanish = List(
105+
List("ene", "enero"),
106+
List("feb", "febrero"),
107+
List("mar", "marzo"),
108+
List("abr", "abril"),
109+
List("may", "mayo"),
110+
List("jun"),
111+
List("jul"),
112+
List("ago", "agosto"),
113+
List("sep", "septiembre"),
114+
List("oct", "octubre"),
115+
List("nov", "noviembre"),
116+
List("dic", "diciembre")
117+
)
101118
}

modules/common/src/main/scala/docspell/common/Language.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,12 @@ object Language {
4747
val iso3 = "ita"
4848
}
4949

50-
val all: List[Language] = List(German, English, French, Italian)
50+
case object Spanish extends Language {
51+
val iso2 = "es"
52+
val iso3 = "spa"
53+
}
54+
55+
val all: List[Language] = List(German, English, French, Italian, Spanish)
5156

5257
def fromString(str: String): Either[String, Language] = {
5358
val lang = str.toLowerCase

modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ object Field {
2525
val content_en = Field("content_en")
2626
val content_fr = Field("content_fr")
2727
val content_it = Field("content_it")
28+
val content_es = Field("content_es")
2829
val itemName = Field("itemName")
2930
val itemNotes = Field("itemNotes")
3031
val folderId = Field("folder")
@@ -39,6 +40,8 @@ object Field {
3940
Field.content_fr
4041
case Language.Italian =>
4142
Field.content_it
43+
case Language.Spanish =>
44+
Field.content_es
4245
}
4346

4447
implicit val jsonEncoder: Encoder[Field] =

modules/fts-solr/src/main/scala/docspell/ftssolr/SolrQuery.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ object SolrQuery {
4141
Field.content_en,
4242
Field.content_fr,
4343
Field.content_it,
44+
Field.content_es,
4445
Field.itemName,
4546
Field.itemNotes,
4647
Field.attachmentName

modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ object SolrSetup {
6969
solrEngine,
7070
"Add content_it field",
7171
addContentItField.map(_ => FtsMigration.Result.reIndexAll)
72+
),
73+
FtsMigration[F](
74+
8,
75+
solrEngine,
76+
"Add content_es field",
77+
addTextField(Some(Language.Spanish))(Field.content_es).map(_ =>
78+
FtsMigration.Result.reIndexAll
79+
)
7280
)
7381
)
7482

modules/webapp/src/main/elm/Data/Language.elm

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ type Language
1212
| English
1313
| French
1414
| Italian
15+
| Spanish
1516

1617

1718
fromString : String -> Maybe Language
@@ -27,6 +28,10 @@ fromString str =
2728

2829
else if str == "ita" || str == "it" || str == "italian" then
2930
Just Italian
31+
32+
else if str == "spa" || str == "es" || str == "spanish" then
33+
Just Spanish
34+
3035
else
3136
Nothing
3237

@@ -46,6 +51,9 @@ toIso3 lang =
4651
Italian ->
4752
"ita"
4853

54+
Spanish ->
55+
"spa"
56+
4957

5058
toName : Language -> String
5159
toName lang =
@@ -62,7 +70,10 @@ toName lang =
6270
Italian ->
6371
"Italian"
6472

73+
Spanish ->
74+
"Spanish"
75+
6576

6677
all : List Language
6778
all =
68-
[ German, English, French, Italian ]
79+
[ German, English, French, Italian, Spanish ]

0 commit comments

Comments
 (0)