Skip to content

Commit 6f056ef

Browse files
committed
Updated tagger.yaml
1 parent 4512743 commit 6f056ef

13 files changed

Lines changed: 368 additions & 254 deletions

server/data/taggers/flair.yaml

Lines changed: 0 additions & 23 deletions
This file was deleted.

server/data/taggers/hug-tdn-1400-1600.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
id: hug-tdn-1400-1600
2-
uri: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/
2+
uri: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_1400-1600
33
version: 1.0.1
44
description: INT-Hug trained on DBNL excerpts, GTB dictionary quotations & CLVN (1400-1600)
55
language: Dutch
@@ -31,4 +31,4 @@ attributions:
3131
- name: Trained by
3232
details: Instituut voor de Nederlandse Taal
3333
href: https://ivdnt.org/
34-
devport: 8110
34+
port: 8110
Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,34 @@
11
id: hug-tdn-1600-1900
2-
description: "INT-Hug trained on DBNL excerpts, GTB dictionary quotations, newspapers & letters as loot (1600-1900)"
3-
tagset: "TDN-Core"
4-
language: "Dutch"
5-
eraFrom: "1600"
6-
eraTo: "1900"
2+
uri: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_1600-1900
3+
version: 1.0.1
4+
description: INT-Hug trained on DBNL excerpts, GTB dictionary quotations, newspapers & letters as loot (1600-1900)
5+
language: Dutch
6+
period:
7+
from: 1600
8+
to: 1900
79
annotations:
8-
- token
9-
- lemma
10-
- pos
11-
model:
12-
name: hug-tdn-1600-1900
13-
href: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_1600-1900
14-
software:
15-
name: int-huggingface
16-
href: https://github.com/instituutnederlandsetaal/int-huggingface-tagger/
17-
dataset:
18-
name: 1600-1900
19-
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.0.1/combinations/1600-1900.combination.json
20-
trainedBy: "INT"
21-
date: "2024-06-12"
22-
devport: 8111
23-
version: "1.0.0"
10+
- annotation: token
11+
- annotation: lemma
12+
principles:
13+
- name: Lemmatisation principles for GiGaNT
14+
details: Lemmatiseerprincipes voor GiGaNT, het centrale lexicon van het INT
15+
href: https://ivdnt.org/wp-content/uploads/2024/11/lemmatiseerprincipesV2_combi.pdf
16+
- annotation: pos
17+
principles:
18+
- name: TDN-Core
19+
details: Tagset voor Diachroon corpusmateriaal van het Nederlands
20+
href: https://ivdnt.org/wp-content/uploads/2024/11/TDNV2_combi.pdf
21+
attributions:
22+
- name: Dataset
23+
details: Galahad Corpus Data 1600-1900
24+
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.0.1/combinations/1600-1900.combination.json
25+
- name: Software
26+
details: INT Huggingface Tagger
27+
href: https://github.com/instituutnederlandsetaal/int-huggingface-tagger/
28+
- name: Model
29+
details: hug-tdn-1600-1900
30+
href: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_1600-1900
31+
- name: Trained by
32+
details: Instituut voor de Nederlandse Taal
33+
href: https://ivdnt.org/
34+
port: 8111
Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,35 @@
11
id: hug-tdn-all-enhanced
2-
description: "INT-Hug trained on all TDN-Core material with enhancements for separable verbs"
3-
tagset: "TDN-Core"
4-
language: "Dutch"
5-
eraFrom: "1400"
6-
eraTo: "1900"
2+
uri: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_all_enhanced
3+
version: 1.0.1
4+
description: INT-Hug trained on all TDN-Core material with enhancements for separable verbs
5+
language: Dutch
6+
period:
7+
from: 1400
8+
to: 1900
79
annotations:
8-
- token
9-
- lemma
10-
- pos
11-
model:
12-
name: hug-tdn-all-enhanced
13-
href: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_all_enhanced
14-
software:
15-
name: int-huggingface
16-
href: https://github.com/instituutnederlandsetaal/int-huggingface-tagger/
17-
dataset:
18-
name: ALL
19-
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.0.1/combinations/ALL.combination.json
20-
trainedBy: "INT"
21-
date: "2024-06-12"
22-
devport: 8117
23-
version: "1.0.0"
10+
- annotation: token
11+
- annotation: lemma
12+
principles:
13+
- name: Lemmatisation principles for GiGaNT
14+
details: Lemmatiseerprincipes voor GiGaNT, het centrale lexicon van het INT
15+
href: https://ivdnt.org/wp-content/uploads/2024/11/lemmatiseerprincipesV2_combi.pdf
16+
- annotation: pos
17+
principles:
18+
- name: TDN-Core
19+
details: Tagset voor Diachroon corpusmateriaal van het Nederlands
20+
href: https://ivdnt.org/wp-content/uploads/2024/11/TDNV2_combi.pdf
21+
attributions:
22+
- name: Dataset
23+
details: Galahad Corpus Data (all)
24+
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.0.1/combinations/ALL.combination.json
25+
- name: Software
26+
details: INT Huggingface Tagger
27+
href: https://github.com/instituutnederlandsetaal/int-huggingface-tagger/
28+
- name: Model
29+
details: hug-tdn-all-enhanced
30+
href: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_all_enhanced
31+
- name: Trained by
32+
details: Instituut voor de Nederlandse Taal
33+
href: https://ivdnt.org/
34+
port: 8117
35+
Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,35 @@
11
id: hug-tdn-all
2-
description: "INT-Hug trained on all TDN-Core material"
3-
tagset: "TDN-Core"
4-
language: "Dutch"
5-
eraFrom: "1400"
6-
eraTo: "1900"
2+
uri: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_ALL
3+
version: 1.0.1
4+
description: INT-Hug trained on all TDN-Core material
5+
language: Dutch
6+
period:
7+
from: 1400
8+
to: 1900
79
annotations:
8-
- token
9-
- lemma
10-
- pos
11-
model:
12-
name: hug-tdn-all
13-
href: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_ALL
14-
software:
15-
name: int-huggingface
16-
href: https://github.com/instituutnederlandsetaal/int-huggingface-tagger/
17-
dataset:
18-
name: ALL
19-
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.0.1/combinations/ALL.combination.json
20-
trainedBy: "INT"
21-
date: "2024-06-12"
22-
devport: 8112
23-
version: "1.0.0"
10+
- annotation: token
11+
- annotation: lemma
12+
principles:
13+
- name: Lemmatisation principles for GiGaNT
14+
details: Lemmatiseerprincipes voor GiGaNT, het centrale lexicon van het INT
15+
href: https://ivdnt.org/wp-content/uploads/2024/11/lemmatiseerprincipesV2_combi.pdf
16+
- annotation: pos
17+
principles:
18+
- name: TDN-Core
19+
details: Tagset voor Diachroon corpusmateriaal van het Nederlands
20+
href: https://ivdnt.org/wp-content/uploads/2024/11/TDNV2_combi.pdf
21+
attributions:
22+
- name: Dataset
23+
details: Galahad Corpus Data (all)
24+
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.0.1/combinations/ALL.combination.json
25+
- name: Software
26+
details: INT Huggingface Tagger
27+
href: https://github.com/instituutnederlandsetaal/int-huggingface-tagger/
28+
- name: Model
29+
details: hug-tdn-all
30+
href: https://github.com/instituutnederlandsetaal/galahad-huggingface-models/tree/master/models/galahad/tagger/pos_model_tdn_ALL
31+
- name: Trained by
32+
details: Instituut voor de Nederlandse Taal
33+
href: https://ivdnt.org/
34+
port: 8112
35+
Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,35 @@
11
id: pie-tdn-1200-1600
2-
description: "PIE trained on DBNL excerpts, GTB dictionary quotations, CLVN & Gysseling (1200-1600)"
3-
tagset: "TDN-Core"
4-
language: "Dutch"
5-
eraFrom: "1200"
6-
eraTo: "1600"
2+
uri: https://github.com/instituutnederlandsetaal/galahad-taggers/tree/2.0.0/pie/TDN-1200-1600
3+
version: 1.1.0
4+
description: PIE trained on DBNL excerpts, GTB dictionary quotations, CLVN & Gysseling (1200-1600)
5+
language: Dutch
6+
period:
7+
from: 1200
8+
to: 1600
79
annotations:
8-
- token
9-
- lemma
10-
- pos
11-
model:
12-
name: pie-tdn-1200-1600
13-
href: https://github.com/instituutnederlandsetaal/galahad-taggers-dockerized/tree/1.1.0/pie/TDN-1200-1600
14-
software:
15-
name: int-pie
16-
href: https://github.com/instituutnederlandsetaal/int-pie/tree/1.1.0
17-
dataset:
18-
name: 1200-1600
19-
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.1.0/combinations/1200-1600.combination.json
20-
trainedBy: "INT"
21-
date: "2025-04-07"
22-
devport: 8100
23-
version: "1.1.0"
10+
- annotation: token
11+
- annotation: lemma
12+
principles:
13+
- name: Lemmatisation principles for GiGaNT
14+
details: Lemmatiseerprincipes voor GiGaNT, het centrale lexicon van het INT
15+
href: https://ivdnt.org/wp-content/uploads/2024/11/lemmatiseerprincipesV2_combi.pdf
16+
- annotation: pos
17+
principles:
18+
- name: TDN-Core
19+
details: Tagset voor Diachroon corpusmateriaal van het Nederlands
20+
href: https://ivdnt.org/wp-content/uploads/2024/11/TDNV2_combi.pdf
21+
attributions:
22+
- name: Dataset
23+
details: Galahad Corpus Data (1200-1600)
24+
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.1.0/combinations/1200-1600.combination.json
25+
- name: Software
26+
details: INT PIE
27+
href: https://github.com/instituutnederlandsetaal/int-pie/tree/1.1.0
28+
- name: Model
29+
details: pie-tdn-1200-1600
30+
href: https://github.com/instituutnederlandsetaal/galahad-taggers/tree/2.0.0/pie/TDN-1200-1600
31+
- name: Trained by
32+
details: Instituut voor de Nederlandse Taal
33+
href: https://ivdnt.org/
34+
port: 8100
35+
Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,34 @@
11
id: pie-tdn-1600-1900
2-
description: "PIE trained on DBNL excerpts, GTB dictionary quotations, newspapers & letters as loot (1600-1900)"
3-
tagset: "TDN-Core"
4-
language: "Dutch"
5-
eraFrom: "1600"
6-
eraTo: "1900"
2+
uri: https://github.com/instituutnederlandsetaal/galahad-taggers/tree/2.0.0/pie/TDN-1600-1900
3+
version: 1.1.0
4+
description: PIE trained on DBNL excerpts, GTB dictionary quotations, newspapers & letters as loot (1600-1900)
5+
language: Dutch
6+
period:
7+
from: 1600
8+
to: 1900
79
annotations:
8-
- token
9-
- lemma
10-
- pos
11-
model:
12-
name: pie-tdn-1600-1900
13-
href: https://github.com/instituutnederlandsetaal/galahad-taggers-dockerized/tree/1.1.0/pie/TDN-1600-1900
14-
software:
15-
name: int-pie
16-
href: https://github.com/instituutnederlandsetaal/int-pie/tree/1.1.0
17-
dataset:
18-
name: 1600-1900
19-
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.1.0/combinations/1600-1900.combination.json
20-
trainedBy: "INT"
21-
date: "2025-04-07"
22-
devport: 8101
23-
version: "1.1.0"
10+
- annotation: token
11+
- annotation: lemma
12+
principles:
13+
- name: Lemmatisation principles for GiGaNT
14+
details: Lemmatiseerprincipes voor GiGaNT, het centrale lexicon van het INT
15+
href: https://ivdnt.org/wp-content/uploads/2024/11/lemmatiseerprincipesV2_combi.pdf
16+
- annotation: pos
17+
principles:
18+
- name: TDN-Core
19+
details: Tagset voor Diachroon corpusmateriaal van het Nederlands
20+
href: https://ivdnt.org/wp-content/uploads/2024/11/TDNV2_combi.pdf
21+
attributions:
22+
- name: Dataset
23+
details: Galahad Corpus Data (1600-1900)
24+
href: https://github.com/instituutnederlandsetaal/galahad-taggers-dockerized/tree/1.1.0/pie/TDN-1600-1900
25+
- name: Software
26+
details: INT PIE
27+
href: https://github.com/instituutnederlandsetaal/int-pie/tree/1.1.0
28+
- name: Model
29+
details: pie-tdn-1600-1900
30+
href: https://github.com/instituutnederlandsetaal/galahad-taggers/tree/2.0.0/pie/TDN-1600-1900
31+
- name: Trained by
32+
details: Instituut voor de Nederlandse Taal
33+
href: https://ivdnt.org/
34+
port: 8101
Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,34 @@
11
id: pie-tdn-all
2-
description: "PIE trained on all TDN-Core material"
3-
tagset: "TDN-Core"
4-
language: "Dutch"
5-
eraFrom: "1200"
6-
eraTo: "1900"
2+
uri: https://github.com/instituutnederlandsetaal/galahad-taggers/tree/2.0.0/pie/TDN-ALL
3+
version: 1.1.0
4+
description: PIE trained on all TDN-Core material
5+
language: Dutch
6+
period:
7+
from: 1200
8+
to: 1900
79
annotations:
8-
- token
9-
- lemma
10-
- pos
11-
model:
12-
name: pie-tdn-all
13-
href: https://github.com/instituutnederlandsetaal/galahad-taggers-dockerized/tree/1.1.0/pie/TDN-ALL
14-
software:
15-
name: int-pie
16-
href: https://github.com/instituutnederlandsetaal/int-pie/tree/1.1.0
17-
dataset:
18-
name: ALL
19-
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.1.0/combinations/ALL.combination.json
20-
trainedBy: "INT"
21-
date: "2025-04-07"
22-
devport: 8102
23-
version: "1.1.0"
10+
- annotation: token
11+
- annotation: lemma
12+
principles:
13+
- name: Lemmatisation principles for GiGaNT
14+
details: Lemmatiseerprincipes voor GiGaNT, het centrale lexicon van het INT
15+
href: https://ivdnt.org/wp-content/uploads/2024/11/lemmatiseerprincipesV2_combi.pdf
16+
- annotation: pos
17+
principles:
18+
- name: TDN-Core
19+
details: Tagset voor Diachroon corpusmateriaal van het Nederlands
20+
href: https://ivdnt.org/wp-content/uploads/2024/11/TDNV2_combi.pdf
21+
attributions:
22+
- name: Dataset
23+
details: Galahad Corpus Data (all)
24+
href: https://github.com/instituutnederlandsetaal/galahad-corpus-data/tree/1.1.0/combinations/ALL.combination.json
25+
- name: Software
26+
details: INT PIE
27+
href: https://github.com/instituutnederlandsetaal/int-pie/tree/1.1.0
28+
- name: Model
29+
details: pie-tdn-all
30+
href: https://github.com/instituutnederlandsetaal/galahad-taggers/tree/2.0.0/pie/TDN-ALL
31+
- name: Trained by
32+
details: Instituut voor de Nederlandse Taal
33+
href: https://ivdnt.org/
34+
port: 8102

0 commit comments

Comments
 (0)