niobures commited on
Commit
ad3b597
·
verified ·
1 Parent(s): fe73573

CoreNLP, UDPipe

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +104 -0
  2. CoreNLP/models/ru/parser/nndep.rus.model81_mf.txt.gz +3 -0
  3. CoreNLP/models/ru/parser/nndep.rus.model90.9_88.6.txt.gz +3 -0
  4. CoreNLP/models/ru/parser/nndep.rus.modelAr100HS400.txt.gz +3 -0
  5. CoreNLP/models/ru/parser/nndep.rus.modelMFAr100HS400_81.txt.gz +3 -0
  6. CoreNLP/models/ru/parser/nndep.rus.modelMFWiki100HS400_80.txt.gz +3 -0
  7. CoreNLP/models/ru/tagger/dict.tsv +3 -0
  8. CoreNLP/models/ru/tagger/russian-ud-mf.tagger +3 -0
  9. CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger +3 -0
  10. CoreNLP/models/ru/tagger/russian-ud-pos.tagger +3 -0
  11. CoreNLP/stanford-postagger-full-2020-11-17/LICENSE.txt +339 -0
  12. CoreNLP/stanford-postagger-full-2020-11-17/README.txt +315 -0
  13. CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo.java +32 -0
  14. CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo2.java +62 -0
  15. CoreNLP/stanford-postagger-full-2020-11-17/build.xml +201 -0
  16. CoreNLP/stanford-postagger-full-2020-11-17/data/enclitic-inflections.data +0 -0
  17. CoreNLP/stanford-postagger-full-2020-11-17/models/README-Models.txt +68 -0
  18. CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger +3 -0
  19. CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger.props +35 -0
  20. CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger +3 -0
  21. CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger.props +35 -0
  22. CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger +3 -0
  23. CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger.props +35 -0
  24. CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger +3 -0
  25. CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger.props +35 -0
  26. CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger +3 -0
  27. CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger.props +35 -0
  28. CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger +3 -0
  29. CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger.props +36 -0
  30. CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger +3 -0
  31. CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger.props +35 -0
  32. CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger +3 -0
  33. CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger.props +35 -0
  34. CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger +3 -0
  35. CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger.props +35 -0
  36. CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger +3 -0
  37. CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger.props +35 -0
  38. CoreNLP/stanford-postagger-full-2020-11-17/sample-input.txt +6 -0
  39. CoreNLP/stanford-postagger-full-2020-11-17/sample-output.txt +3 -0
  40. CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar +3 -0
  41. CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar +3 -0
  42. CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar +3 -0
  43. CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.bat +3 -0
  44. CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.sh +2 -0
  45. CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.bat +4 -0
  46. CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar +3 -0
  47. CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.sh +6 -0
  48. UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll +0 -0
  49. UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll.config +83 -0
  50. UDPipe/v1/XDependencies/UDPipe/AUTHORS +2 -0
.gitattributes CHANGED
@@ -33,3 +33,107 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ CoreNLP/models/ru/tagger/dict.tsv filter=lfs diff=lfs merge=lfs -text
37
+ CoreNLP/models/ru/tagger/russian-ud-mf.tagger filter=lfs diff=lfs merge=lfs -text
38
+ CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger filter=lfs diff=lfs merge=lfs -text
39
+ CoreNLP/models/ru/tagger/russian-ud-pos.tagger filter=lfs diff=lfs merge=lfs -text
40
+ CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger filter=lfs diff=lfs merge=lfs -text
41
+ CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger filter=lfs diff=lfs merge=lfs -text
42
+ CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger filter=lfs diff=lfs merge=lfs -text
43
+ CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger filter=lfs diff=lfs merge=lfs -text
44
+ CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger filter=lfs diff=lfs merge=lfs -text
45
+ CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
46
+ CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
47
+ CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger filter=lfs diff=lfs merge=lfs -text
48
+ CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger filter=lfs diff=lfs merge=lfs -text
49
+ CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger filter=lfs diff=lfs merge=lfs -text
50
+ CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar filter=lfs diff=lfs merge=lfs -text
51
+ CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar filter=lfs diff=lfs merge=lfs -text
52
+ CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar filter=lfs diff=lfs merge=lfs -text
53
+ CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar filter=lfs diff=lfs merge=lfs -text
54
+ UDPipe/v1/binary/linux32/csharp/libudpipe_csharp.so filter=lfs diff=lfs merge=lfs -text
55
+ UDPipe/v1/binary/linux32/java/libudpipe_java.so filter=lfs diff=lfs merge=lfs -text
56
+ UDPipe/v1/binary/linux32/udpipe filter=lfs diff=lfs merge=lfs -text
57
+ UDPipe/v1/binary/linux64/csharp/libudpipe_csharp.so filter=lfs diff=lfs merge=lfs -text
58
+ UDPipe/v1/binary/linux64/java/libudpipe_java.so filter=lfs diff=lfs merge=lfs -text
59
+ UDPipe/v1/binary/linux64/udpipe filter=lfs diff=lfs merge=lfs -text
60
+ UDPipe/v1/binary/linuxArmv7/libudpipe.a filter=lfs diff=lfs merge=lfs -text
61
+ UDPipe/v1/binary/linuxArmv7/rest_server/udpipe_server filter=lfs diff=lfs merge=lfs -text
62
+ UDPipe/v1/binary/linuxArmv7/udpipe filter=lfs diff=lfs merge=lfs -text
63
+ UDPipe/v1/binary/osx/csharp/libudpipe_csharp.dylib filter=lfs diff=lfs merge=lfs -text
64
+ UDPipe/v1/binary/osx/java/libudpipe_java.dylib filter=lfs diff=lfs merge=lfs -text
65
+ UDPipe/v1/binary/osx/udpipe filter=lfs diff=lfs merge=lfs -text
66
+ UDPipe/v1/binary/win64/csharp/udpipe_csharp.dll filter=lfs diff=lfs merge=lfs -text
67
+ UDPipe/v1/binary/win64/java/udpipe_java.dll filter=lfs diff=lfs merge=lfs -text
68
+ UDPipe/v1/binary/win64/udpipe.exe filter=lfs diff=lfs merge=lfs -text
69
+ UDPipe/v1/udpipe_csharp/x64/udpipe_csharp.dll filter=lfs diff=lfs merge=lfs -text
70
+ UDPipe/v1/XDependencies/UDPipe/MANUAL.pdf filter=lfs diff=lfs merge=lfs -text
71
+ UDPipe/v1/XDependencies/UDPipe/Models/Ancient_greek-PROIEL.udpipe filter=lfs diff=lfs merge=lfs -text
72
+ UDPipe/v1/XDependencies/UDPipe/Models/Ancient_greek.udpipe filter=lfs diff=lfs merge=lfs -text
73
+ UDPipe/v1/XDependencies/UDPipe/Models/Arabic.udpipe filter=lfs diff=lfs merge=lfs -text
74
+ UDPipe/v1/XDependencies/UDPipe/Models/Basque.udpipe filter=lfs diff=lfs merge=lfs -text
75
+ UDPipe/v1/XDependencies/UDPipe/Models/Belarusian.udpipe filter=lfs diff=lfs merge=lfs -text
76
+ UDPipe/v1/XDependencies/UDPipe/Models/Bulgarian.udpipe filter=lfs diff=lfs merge=lfs -text
77
+ UDPipe/v1/XDependencies/UDPipe/Models/Catalan.udpipe filter=lfs diff=lfs merge=lfs -text
78
+ UDPipe/v1/XDependencies/UDPipe/Models/Chinese.udpipe filter=lfs diff=lfs merge=lfs -text
79
+ UDPipe/v1/XDependencies/UDPipe/Models/Coptic.udpipe filter=lfs diff=lfs merge=lfs -text
80
+ UDPipe/v1/XDependencies/UDPipe/Models/Croatian.udpipe filter=lfs diff=lfs merge=lfs -text
81
+ UDPipe/v1/XDependencies/UDPipe/Models/Czech-CAC.udpipe filter=lfs diff=lfs merge=lfs -text
82
+ UDPipe/v1/XDependencies/UDPipe/Models/Czech-CLTT.udpipe filter=lfs diff=lfs merge=lfs -text
83
+ UDPipe/v1/XDependencies/UDPipe/Models/Czech.udpipe filter=lfs diff=lfs merge=lfs -text
84
+ UDPipe/v1/XDependencies/UDPipe/Models/Danish.udpipe filter=lfs diff=lfs merge=lfs -text
85
+ UDPipe/v1/XDependencies/UDPipe/Models/Dutch-LASSYSMALL.udpipe filter=lfs diff=lfs merge=lfs -text
86
+ UDPipe/v1/XDependencies/UDPipe/Models/Dutch.udpipe filter=lfs diff=lfs merge=lfs -text
87
+ UDPipe/v1/XDependencies/UDPipe/Models/English-LINES.udpipe filter=lfs diff=lfs merge=lfs -text
88
+ UDPipe/v1/XDependencies/UDPipe/Models/English-PARTUT.udpipe filter=lfs diff=lfs merge=lfs -text
89
+ UDPipe/v1/XDependencies/UDPipe/Models/English.udpipe filter=lfs diff=lfs merge=lfs -text
90
+ UDPipe/v1/XDependencies/UDPipe/Models/Estonian.udpipe filter=lfs diff=lfs merge=lfs -text
91
+ UDPipe/v1/XDependencies/UDPipe/Models/Finnish-FTB.udpipe filter=lfs diff=lfs merge=lfs -text
92
+ UDPipe/v1/XDependencies/UDPipe/Models/Finnish.udpipe filter=lfs diff=lfs merge=lfs -text
93
+ UDPipe/v1/XDependencies/UDPipe/Models/French-PARTUT.udpipe filter=lfs diff=lfs merge=lfs -text
94
+ UDPipe/v1/XDependencies/UDPipe/Models/French-SEQUOIA.udpipe filter=lfs diff=lfs merge=lfs -text
95
+ UDPipe/v1/XDependencies/UDPipe/Models/French.udpipe filter=lfs diff=lfs merge=lfs -text
96
+ UDPipe/v1/XDependencies/UDPipe/Models/Galician-TREEGAL.udpipe filter=lfs diff=lfs merge=lfs -text
97
+ UDPipe/v1/XDependencies/UDPipe/Models/Galician.udpipe filter=lfs diff=lfs merge=lfs -text
98
+ UDPipe/v1/XDependencies/UDPipe/Models/German.udpipe filter=lfs diff=lfs merge=lfs -text
99
+ UDPipe/v1/XDependencies/UDPipe/Models/Gothic.udpipe filter=lfs diff=lfs merge=lfs -text
100
+ UDPipe/v1/XDependencies/UDPipe/Models/Greek.udpipe filter=lfs diff=lfs merge=lfs -text
101
+ UDPipe/v1/XDependencies/UDPipe/Models/Hebrew.udpipe filter=lfs diff=lfs merge=lfs -text
102
+ UDPipe/v1/XDependencies/UDPipe/Models/Hindi.udpipe filter=lfs diff=lfs merge=lfs -text
103
+ UDPipe/v1/XDependencies/UDPipe/Models/Hungarian.udpipe filter=lfs diff=lfs merge=lfs -text
104
+ UDPipe/v1/XDependencies/UDPipe/Models/Indonesian.udpipe filter=lfs diff=lfs merge=lfs -text
105
+ UDPipe/v1/XDependencies/UDPipe/Models/Irish.udpipe filter=lfs diff=lfs merge=lfs -text
106
+ UDPipe/v1/XDependencies/UDPipe/Models/Italian.udpipe filter=lfs diff=lfs merge=lfs -text
107
+ UDPipe/v1/XDependencies/UDPipe/Models/Japanese.udpipe filter=lfs diff=lfs merge=lfs -text
108
+ UDPipe/v1/XDependencies/UDPipe/Models/Kazakh.udpipe filter=lfs diff=lfs merge=lfs -text
109
+ UDPipe/v1/XDependencies/UDPipe/Models/Korean.udpipe filter=lfs diff=lfs merge=lfs -text
110
+ UDPipe/v1/XDependencies/UDPipe/Models/Latin-ITTB.udpipe filter=lfs diff=lfs merge=lfs -text
111
+ UDPipe/v1/XDependencies/UDPipe/Models/Latin-PROIEL.udpipe filter=lfs diff=lfs merge=lfs -text
112
+ UDPipe/v1/XDependencies/UDPipe/Models/Latin.udpipe filter=lfs diff=lfs merge=lfs -text
113
+ UDPipe/v1/XDependencies/UDPipe/Models/Latvian.udpipe filter=lfs diff=lfs merge=lfs -text
114
+ UDPipe/v1/XDependencies/UDPipe/Models/Lithuanian.udpipe filter=lfs diff=lfs merge=lfs -text
115
+ UDPipe/v1/XDependencies/UDPipe/Models/Norwegian-BOKMAAL.udpipe filter=lfs diff=lfs merge=lfs -text
116
+ UDPipe/v1/XDependencies/UDPipe/Models/Norwegian-NYNORSK.udpipe filter=lfs diff=lfs merge=lfs -text
117
+ UDPipe/v1/XDependencies/UDPipe/Models/Old_church_slavonic.udpipe filter=lfs diff=lfs merge=lfs -text
118
+ UDPipe/v1/XDependencies/UDPipe/Models/Persian.udpipe filter=lfs diff=lfs merge=lfs -text
119
+ UDPipe/v1/XDependencies/UDPipe/Models/Polish.udpipe filter=lfs diff=lfs merge=lfs -text
120
+ UDPipe/v1/XDependencies/UDPipe/Models/Portuguese-BR.udpipe filter=lfs diff=lfs merge=lfs -text
121
+ UDPipe/v1/XDependencies/UDPipe/Models/Portuguese.udpipe filter=lfs diff=lfs merge=lfs -text
122
+ UDPipe/v1/XDependencies/UDPipe/Models/Romanian.udpipe filter=lfs diff=lfs merge=lfs -text
123
+ UDPipe/v1/XDependencies/UDPipe/Models/Russian-SYNTAGRUS.udpipe filter=lfs diff=lfs merge=lfs -text
124
+ UDPipe/v1/XDependencies/UDPipe/Models/Russian.udpipe filter=lfs diff=lfs merge=lfs -text
125
+ UDPipe/v1/XDependencies/UDPipe/Models/Sanskrit.udpipe filter=lfs diff=lfs merge=lfs -text
126
+ UDPipe/v1/XDependencies/UDPipe/Models/Slovak.udpipe filter=lfs diff=lfs merge=lfs -text
127
+ UDPipe/v1/XDependencies/UDPipe/Models/Slovenian-SST.udpipe filter=lfs diff=lfs merge=lfs -text
128
+ UDPipe/v1/XDependencies/UDPipe/Models/Slovenian.udpipe filter=lfs diff=lfs merge=lfs -text
129
+ UDPipe/v1/XDependencies/UDPipe/Models/Spanish-ANCORA.udpipe filter=lfs diff=lfs merge=lfs -text
130
+ UDPipe/v1/XDependencies/UDPipe/Models/Spanish.udpipe filter=lfs diff=lfs merge=lfs -text
131
+ UDPipe/v1/XDependencies/UDPipe/Models/Swedish-LINES.udpipe filter=lfs diff=lfs merge=lfs -text
132
+ UDPipe/v1/XDependencies/UDPipe/Models/Swedish.udpipe filter=lfs diff=lfs merge=lfs -text
133
+ UDPipe/v1/XDependencies/UDPipe/Models/Tamil.udpipe filter=lfs diff=lfs merge=lfs -text
134
+ UDPipe/v1/XDependencies/UDPipe/Models/Turkish.udpipe filter=lfs diff=lfs merge=lfs -text
135
+ UDPipe/v1/XDependencies/UDPipe/Models/Ukrainian.udpipe filter=lfs diff=lfs merge=lfs -text
136
+ UDPipe/v1/XDependencies/UDPipe/Models/Urdu.udpipe filter=lfs diff=lfs merge=lfs -text
137
+ UDPipe/v1/XDependencies/UDPipe/Models/Uyghur.udpipe filter=lfs diff=lfs merge=lfs -text
138
+ UDPipe/v1/XDependencies/UDPipe/Models/Vietnamese.udpipe filter=lfs diff=lfs merge=lfs -text
139
+ UDPipe/v1/XDependencies/UDPipe/udpipe.exe filter=lfs diff=lfs merge=lfs -text
CoreNLP/models/ru/parser/nndep.rus.model81_mf.txt.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326fc20cdd0602da6ea44279f4d04761670bb03d86297a9be59d8c921bd11580
3
+ size 115029311
CoreNLP/models/ru/parser/nndep.rus.model90.9_88.6.txt.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f8e0cb78116c4fba174b6ab19dccc7867b7cea2dd44ccea7067ee949cb04e69
3
+ size 118576259
CoreNLP/models/ru/parser/nndep.rus.modelAr100HS400.txt.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b09c5d84c44ca668372b78f8c20235fb3fe701df25169dd0182e4830043b29
3
+ size 127444031
CoreNLP/models/ru/parser/nndep.rus.modelMFAr100HS400_81.txt.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e5ea42afc93a9f9a6c7c915451d1257e6455faaa3f01b06bd1643af3bafaff2
3
+ size 118710465
CoreNLP/models/ru/parser/nndep.rus.modelMFWiki100HS400_80.txt.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8afeae32c7675e237f5cef4f225e9ed92d8c66b80e37b06fe710cfcb6f558617
3
+ size 119027293
CoreNLP/models/ru/tagger/dict.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01234d489dc349b61843573e725f2fd3e265e004a1408a6712c5f6fafca5d506
3
+ size 129696832
CoreNLP/models/ru/tagger/russian-ud-mf.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd10da6d84ec2f8d150d48147dac16d2209b2bd09af0e9740c7fa18abe5b1e8a
3
+ size 27644829
CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c85cdd823a4c7ea7c62af4607b425b6a303f414d992a22c067d6be58f2a231df
3
+ size 22430426
CoreNLP/models/ru/tagger/russian-ud-pos.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57eb9e303d97d9c115b879deaa0ffa6e8b2519c8713bb040e5bd5827c8ee64de
3
+ size 9018233
CoreNLP/stanford-postagger-full-2020-11-17/LICENSE.txt ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 2, June 1991
3
+
4
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+ Preamble
10
+
11
+ The licenses for most software are designed to take away your
12
+ freedom to share and change it. By contrast, the GNU General Public
13
+ License is intended to guarantee your freedom to share and change free
14
+ software--to make sure the software is free for all its users. This
15
+ General Public License applies to most of the Free Software
16
+ Foundation's software and to any other program whose authors commit to
17
+ using it. (Some other Free Software Foundation software is covered by
18
+ the GNU Lesser General Public License instead.) You can apply it to
19
+ your programs, too.
20
+
21
+ When we speak of free software, we are referring to freedom, not
22
+ price. Our General Public Licenses are designed to make sure that you
23
+ have the freedom to distribute copies of free software (and charge for
24
+ this service if you wish), that you receive source code or can get it
25
+ if you want it, that you can change the software or use pieces of it
26
+ in new free programs; and that you know you can do these things.
27
+
28
+ To protect your rights, we need to make restrictions that forbid
29
+ anyone to deny you these rights or to ask you to surrender the rights.
30
+ These restrictions translate to certain responsibilities for you if you
31
+ distribute copies of the software, or if you modify it.
32
+
33
+ For example, if you distribute copies of such a program, whether
34
+ gratis or for a fee, you must give the recipients all the rights that
35
+ you have. You must make sure that they, too, receive or can get the
36
+ source code. And you must show them these terms so they know their
37
+ rights.
38
+
39
+ We protect your rights with two steps: (1) copyright the software, and
40
+ (2) offer you this license which gives you legal permission to copy,
41
+ distribute and/or modify the software.
42
+
43
+ Also, for each author's protection and ours, we want to make certain
44
+ that everyone understands that there is no warranty for this free
45
+ software. If the software is modified by someone else and passed on, we
46
+ want its recipients to know that what they have is not the original, so
47
+ that any problems introduced by others will not reflect on the original
48
+ authors' reputations.
49
+
50
+ Finally, any free program is threatened constantly by software
51
+ patents. We wish to avoid the danger that redistributors of a free
52
+ program will individually obtain patent licenses, in effect making the
53
+ program proprietary. To prevent this, we have made it clear that any
54
+ patent must be licensed for everyone's free use or not licensed at all.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ GNU GENERAL PUBLIC LICENSE
60
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
+
62
+ 0. This License applies to any program or other work which contains
63
+ a notice placed by the copyright holder saying it may be distributed
64
+ under the terms of this General Public License. The "Program", below,
65
+ refers to any such program or work, and a "work based on the Program"
66
+ means either the Program or any derivative work under copyright law:
67
+ that is to say, a work containing the Program or a portion of it,
68
+ either verbatim or with modifications and/or translated into another
69
+ language. (Hereinafter, translation is included without limitation in
70
+ the term "modification".) Each licensee is addressed as "you".
71
+
72
+ Activities other than copying, distribution and modification are not
73
+ covered by this License; they are outside its scope. The act of
74
+ running the Program is not restricted, and the output from the Program
75
+ is covered only if its contents constitute a work based on the
76
+ Program (independent of having been made by running the Program).
77
+ Whether that is true depends on what the Program does.
78
+
79
+ 1. You may copy and distribute verbatim copies of the Program's
80
+ source code as you receive it, in any medium, provided that you
81
+ conspicuously and appropriately publish on each copy an appropriate
82
+ copyright notice and disclaimer of warranty; keep intact all the
83
+ notices that refer to this License and to the absence of any warranty;
84
+ and give any other recipients of the Program a copy of this License
85
+ along with the Program.
86
+
87
+ You may charge a fee for the physical act of transferring a copy, and
88
+ you may at your option offer warranty protection in exchange for a fee.
89
+
90
+ 2. You may modify your copy or copies of the Program or any portion
91
+ of it, thus forming a work based on the Program, and copy and
92
+ distribute such modifications or work under the terms of Section 1
93
+ above, provided that you also meet all of these conditions:
94
+
95
+ a) You must cause the modified files to carry prominent notices
96
+ stating that you changed the files and the date of any change.
97
+
98
+ b) You must cause any work that you distribute or publish, that in
99
+ whole or in part contains or is derived from the Program or any
100
+ part thereof, to be licensed as a whole at no charge to all third
101
+ parties under the terms of this License.
102
+
103
+ c) If the modified program normally reads commands interactively
104
+ when run, you must cause it, when started running for such
105
+ interactive use in the most ordinary way, to print or display an
106
+ announcement including an appropriate copyright notice and a
107
+ notice that there is no warranty (or else, saying that you provide
108
+ a warranty) and that users may redistribute the program under
109
+ these conditions, and telling the user how to view a copy of this
110
+ License. (Exception: if the Program itself is interactive but
111
+ does not normally print such an announcement, your work based on
112
+ the Program is not required to print an announcement.)
113
+
114
+ These requirements apply to the modified work as a whole. If
115
+ identifiable sections of that work are not derived from the Program,
116
+ and can be reasonably considered independent and separate works in
117
+ themselves, then this License, and its terms, do not apply to those
118
+ sections when you distribute them as separate works. But when you
119
+ distribute the same sections as part of a whole which is a work based
120
+ on the Program, the distribution of the whole must be on the terms of
121
+ this License, whose permissions for other licensees extend to the
122
+ entire whole, and thus to each and every part regardless of who wrote it.
123
+
124
+ Thus, it is not the intent of this section to claim rights or contest
125
+ your rights to work written entirely by you; rather, the intent is to
126
+ exercise the right to control the distribution of derivative or
127
+ collective works based on the Program.
128
+
129
+ In addition, mere aggregation of another work not based on the Program
130
+ with the Program (or with a work based on the Program) on a volume of
131
+ a storage or distribution medium does not bring the other work under
132
+ the scope of this License.
133
+
134
+ 3. You may copy and distribute the Program (or a work based on it,
135
+ under Section 2) in object code or executable form under the terms of
136
+ Sections 1 and 2 above provided that you also do one of the following:
137
+
138
+ a) Accompany it with the complete corresponding machine-readable
139
+ source code, which must be distributed under the terms of Sections
140
+ 1 and 2 above on a medium customarily used for software interchange; or,
141
+
142
+ b) Accompany it with a written offer, valid for at least three
143
+ years, to give any third party, for a charge no more than your
144
+ cost of physically performing source distribution, a complete
145
+ machine-readable copy of the corresponding source code, to be
146
+ distributed under the terms of Sections 1 and 2 above on a medium
147
+ customarily used for software interchange; or,
148
+
149
+ c) Accompany it with the information you received as to the offer
150
+ to distribute corresponding source code. (This alternative is
151
+ allowed only for noncommercial distribution and only if you
152
+ received the program in object code or executable form with such
153
+ an offer, in accord with Subsection b above.)
154
+
155
+ The source code for a work means the preferred form of the work for
156
+ making modifications to it. For an executable work, complete source
157
+ code means all the source code for all modules it contains, plus any
158
+ associated interface definition files, plus the scripts used to
159
+ control compilation and installation of the executable. However, as a
160
+ special exception, the source code distributed need not include
161
+ anything that is normally distributed (in either source or binary
162
+ form) with the major components (compiler, kernel, and so on) of the
163
+ operating system on which the executable runs, unless that component
164
+ itself accompanies the executable.
165
+
166
+ If distribution of executable or object code is made by offering
167
+ access to copy from a designated place, then offering equivalent
168
+ access to copy the source code from the same place counts as
169
+ distribution of the source code, even though third parties are not
170
+ compelled to copy the source along with the object code.
171
+
172
+ 4. You may not copy, modify, sublicense, or distribute the Program
173
+ except as expressly provided under this License. Any attempt
174
+ otherwise to copy, modify, sublicense or distribute the Program is
175
+ void, and will automatically terminate your rights under this License.
176
+ However, parties who have received copies, or rights, from you under
177
+ this License will not have their licenses terminated so long as such
178
+ parties remain in full compliance.
179
+
180
+ 5. You are not required to accept this License, since you have not
181
+ signed it. However, nothing else grants you permission to modify or
182
+ distribute the Program or its derivative works. These actions are
183
+ prohibited by law if you do not accept this License. Therefore, by
184
+ modifying or distributing the Program (or any work based on the
185
+ Program), you indicate your acceptance of this License to do so, and
186
+ all its terms and conditions for copying, distributing or modifying
187
+ the Program or works based on it.
188
+
189
+ 6. Each time you redistribute the Program (or any work based on the
190
+ Program), the recipient automatically receives a license from the
191
+ original licensor to copy, distribute or modify the Program subject to
192
+ these terms and conditions. You may not impose any further
193
+ restrictions on the recipients' exercise of the rights granted herein.
194
+ You are not responsible for enforcing compliance by third parties to
195
+ this License.
196
+
197
+ 7. If, as a consequence of a court judgment or allegation of patent
198
+ infringement or for any other reason (not limited to patent issues),
199
+ conditions are imposed on you (whether by court order, agreement or
200
+ otherwise) that contradict the conditions of this License, they do not
201
+ excuse you from the conditions of this License. If you cannot
202
+ distribute so as to satisfy simultaneously your obligations under this
203
+ License and any other pertinent obligations, then as a consequence you
204
+ may not distribute the Program at all. For example, if a patent
205
+ license would not permit royalty-free redistribution of the Program by
206
+ all those who receive copies directly or indirectly through you, then
207
+ the only way you could satisfy both it and this License would be to
208
+ refrain entirely from distribution of the Program.
209
+
210
+ If any portion of this section is held invalid or unenforceable under
211
+ any particular circumstance, the balance of the section is intended to
212
+ apply and the section as a whole is intended to apply in other
213
+ circumstances.
214
+
215
+ It is not the purpose of this section to induce you to infringe any
216
+ patents or other property right claims or to contest validity of any
217
+ such claims; this section has the sole purpose of protecting the
218
+ integrity of the free software distribution system, which is
219
+ implemented by public license practices. Many people have made
220
+ generous contributions to the wide range of software distributed
221
+ through that system in reliance on consistent application of that
222
+ system; it is up to the author/donor to decide if he or she is willing
223
+ to distribute software through any other system and a licensee cannot
224
+ impose that choice.
225
+
226
+ This section is intended to make thoroughly clear what is believed to
227
+ be a consequence of the rest of this License.
228
+
229
+ 8. If the distribution and/or use of the Program is restricted in
230
+ certain countries either by patents or by copyrighted interfaces, the
231
+ original copyright holder who places the Program under this License
232
+ may add an explicit geographical distribution limitation excluding
233
+ those countries, so that distribution is permitted only in or among
234
+ countries not thus excluded. In such case, this License incorporates
235
+ the limitation as if written in the body of this License.
236
+
237
+ 9. The Free Software Foundation may publish revised and/or new versions
238
+ of the General Public License from time to time. Such new versions will
239
+ be similar in spirit to the present version, but may differ in detail to
240
+ address new problems or concerns.
241
+
242
+ Each version is given a distinguishing version number. If the Program
243
+ specifies a version number of this License which applies to it and "any
244
+ later version", you have the option of following the terms and conditions
245
+ either of that version or of any later version published by the Free
246
+ Software Foundation. If the Program does not specify a version number of
247
+ this License, you may choose any version ever published by the Free Software
248
+ Foundation.
249
+
250
+ 10. If you wish to incorporate parts of the Program into other free
251
+ programs whose distribution conditions are different, write to the author
252
+ to ask for permission. For software which is copyrighted by the Free
253
+ Software Foundation, write to the Free Software Foundation; we sometimes
254
+ make exceptions for this. Our decision will be guided by the two goals
255
+ of preserving the free status of all derivatives of our free software and
256
+ of promoting the sharing and reuse of software generally.
257
+
258
+ NO WARRANTY
259
+
260
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264
+ OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266
+ TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268
+ REPAIR OR CORRECTION.
269
+
270
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273
+ INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274
+ OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275
+ TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276
+ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277
+ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278
+ POSSIBILITY OF SUCH DAMAGES.
279
+
280
+ END OF TERMS AND CONDITIONS
281
+
282
+ How to Apply These Terms to Your New Programs
283
+
284
+ If you develop a new program, and you want it to be of the greatest
285
+ possible use to the public, the best way to achieve this is to make it
286
+ free software which everyone can redistribute and change under these terms.
287
+
288
+ To do so, attach the following notices to the program. It is safest
289
+ to attach them to the start of each source file to most effectively
290
+ convey the exclusion of warranty; and each file should have at least
291
+ the "copyright" line and a pointer to where the full notice is found.
292
+
293
+ <one line to give the program's name and a brief idea of what it does.>
294
+ Copyright (C) <year> <name of author>
295
+
296
+ This program is free software; you can redistribute it and/or modify
297
+ it under the terms of the GNU General Public License as published by
298
+ the Free Software Foundation; either version 2 of the License, or
299
+ (at your option) any later version.
300
+
301
+ This program is distributed in the hope that it will be useful,
302
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
303
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304
+ GNU General Public License for more details.
305
+
306
+ You should have received a copy of the GNU General Public License along
307
+ with this program; if not, write to the Free Software Foundation, Inc.,
308
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309
+
310
+ Also add information on how to contact you by electronic and paper mail.
311
+
312
+ If the program is interactive, make it output a short notice like this
313
+ when it starts in an interactive mode:
314
+
315
+ Gnomovision version 69, Copyright (C) year name of author
316
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317
+ This is free software, and you are welcome to redistribute it
318
+ under certain conditions; type `show c' for details.
319
+
320
+ The hypothetical commands `show w' and `show c' should show the appropriate
321
+ parts of the General Public License. Of course, the commands you use may
322
+ be called something other than `show w' and `show c'; they could even be
323
+ mouse-clicks or menu items--whatever suits your program.
324
+
325
+ You should also get your employer (if you work as a programmer) or your
326
+ school, if any, to sign a "copyright disclaimer" for the program, if
327
+ necessary. Here is a sample; alter the names:
328
+
329
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
331
+
332
+ <signature of Ty Coon>, 1 April 1989
333
+ Ty Coon, President of Vice
334
+
335
+ This General Public License does not permit incorporating your program into
336
+ proprietary programs. If your program is a subroutine library, you may
337
+ consider it more useful to permit linking proprietary applications with the
338
+ library. If this is what you want to do, use the GNU Lesser General
339
+ Public License instead of this License.
CoreNLP/stanford-postagger-full-2020-11-17/README.txt ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Stanford POS Tagger, v4.2.0 - 2020-11-17
2
+ Copyright (c) 2002-2020 The Board of Trustees of
3
+ The Leland Stanford Junior University. All Rights Reserved.
4
+
5
+ Original tagger author: Kristina Toutanova
6
+ Code contributions: Christopher Manning, Dan Klein, William Morgan,
7
+ Huihsin Tseng, Anna Rafferty, John Bauer
8
+ Major rewrite for version 2.0 by Michel Galley.
9
+ Current release prepared by: Jason Bolton
10
+
11
+ This package contains a Maximum Entropy part of speech tagger.
12
+
13
+ A Part-Of-Speech Tagger (POS Tagger) is a piece of software that reads
14
+ text in some language and assigns parts of speech to each word (and
15
+ other tokens), such as noun, verb, adjective, etc. Generally
16
+ computational applications use more fine-grained POS tags like
17
+ 'noun-plural'. This software is a Java implementation of the log-linear
18
+ part-of-speech (POS) taggers described in:
19
+
20
+ Kristina Toutanova and Christopher D. Manning. 2000. Enriching the
21
+ Knowledge Sources Used in a Maximum Entropy Part-of-Speech
22
+ Tagger. Proceedings of the Joint SIGDAT Conference on Empirical Methods
23
+ in Natural Language Processing and Very Large Corpora (EMNLP/VLC-2000),
24
+ Hong Kong.
25
+
26
+ Kristina Toutanova, Dan Klein, Christopher Manning, and Yoram
27
+ Singer. 2003. Feature-Rich Part-of-Speech Tagging with a Cyclic
28
+ Dependency Network. In Proceedings of HLT-NAACL 2003 pages 252-259.
29
+
30
+ The system requires Java 1.8+ to be installed. About 60 MB of memory is
31
+ required to run a trained tagger, depending on the OS, tagging model
32
+ chosen, etc. (i.e., you may need to give to java an option like java
33
+ -mx120m). Plenty of memory is needed to train a tagger. It depends on
34
+ the complexity of the model but at least 1GB is recommended (java
35
+ -mx1g). Two trained tagger models for English are included with the
36
+ tagger, along with some caseless versions, and we provide models for
37
+ some other languages. The tagger can be retrained on other languages
38
+ based on POS-annotated training text.
39
+
40
+
41
+
42
+ QUICKSTART
43
+ -----------------------------------------------
44
+
45
+ The Stanford POS Tagger is designed to be used from the command line or
46
+ programmatically via its API.
47
+
48
+ There is a GUI interface, but it is for
49
+ demonstration purposes only; most features of the tagger can only be
50
+ accessed via the command line. To run the demonstration GUI you should
51
+ be able to use any of the following 2 methods:
52
+
53
+ 1)
54
+ java -mx200m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI models/wsj-0-18-left3words-distsim.tagger
55
+
56
+ 2) Running the appropriate script for your operating system:
57
+ stanford-postagger-gui.bat
58
+ ./stanford-postagger-gui.sh
59
+
60
+ To run the tagger from the command line, you can start with the provided
61
+ script appropriate for you operating system:
62
+ ./stanford-postagger.sh models/wsj-0-18-left3words-distsim.tagger sample-input.txt
63
+ stanford-postagger models\wsj-0-18-left3words-distsim.tagger sample-input.txt
64
+ The output should match what is found in sample-output.txt
65
+
66
+ The tagger has three modes: tagging, training, and testing. Tagging
67
+ allows you to use a pretrained model (two English models are included)
68
+ to assign part of speech tags to unlabeled text. Training allows you to
69
+ save a new model based on a set of tagged data that you provide.
70
+ Testing allows you to see how well a tagger performs by tagging labeled
71
+ data and evaluating the results against the correct tags.
72
+
73
+ Many options are available for training, tagging, and testing. These
74
+ options can be set using a properties file. To start, you can generate a
75
+ default properties file by:
76
+
77
+ java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -genprops > myPropsFile.prop
78
+
79
+ This will create the file myPropsFile.prop with descriptions of each
80
+ option for the tagger and the default values for these options
81
+ specified. Any properties you can specify in a properties file can be
82
+ specified on the command line or vice versa. For further information,
83
+ please consult the Javadocs (start with the entry for MaxentTagger,
84
+ which includes a table of all options which may be set to configure the
85
+ tagger and descriptions of those options).
86
+
87
+
88
+ To tag a file using the pre-trained bidirectional model
89
+ =======================================================
90
+
91
+ java -mx300m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -model models/wsj-0-18-bidirectional-distsim.tagger -textFile sample-input.txt > sample-tagged.txt
92
+
93
+ Tagged output will be printed to standard out, which you can redirect
94
+ as above. Note that the bidirectional model is slightly more accurate
95
+ but significantly slower than the left3words model.
96
+
97
+ To train a simple model
98
+ =======================
99
+
100
+ java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -prop propertiesFile -model modelFile -trainFile trainingFile
101
+
102
+ To test a model
103
+ ===============
104
+
105
+ java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -prop propertiesFile -model modelFile -testFile testFile
106
+
107
+ Using models for French, German, and Spanish
108
+ ===========================================
109
+
110
+ Starting with version 4.0.0, French, German, and Spanish are tokenized according to the UD 2.0 standard. This includes creating
111
+ multiword tokens. This functionality requires the pipeline functionality only available in the full Stanford CoreNLP distribution.
112
+ To tag French, German, or Spanish, one must provide UD 2.0 tokenized text, or upgrade to the full Stanford CoreNLP package to get
113
+ UD 2.0 tokenization for these languages.
114
+
115
+ To run on pretokenized text, add "-tokenize false" to your command.
116
+
117
+ Example:
118
+
119
+ java -mx300m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -model models/french-ud.tagger -tokenize false -textFile sample-input.txt > sample-tagged.txt
120
+
121
+ CONTENTS
122
+ -----------------------------------------------
123
+ README.txt
124
+
125
+ This file.
126
+
127
+ LICENSE.txt
128
+
129
+ Stanford POS Tagger is licensed under the GNU General Public License (v2+).
130
+
131
+ stanford-postagger.jar
132
+ stanford-postagger-YYYY-MM-DD.jar
133
+
134
+ This is a JAR file containing all the Stanford classes necessary to
135
+ run the Stanford POS Tagger. The two jar files are identical. You can use
136
+ either the one with a version (date) indication or without, as you prefer.
137
+
138
+ src
139
+
140
+ A directory containing the Java 1.8 source code for the Stanford POS
141
+ Tagger distribution.
142
+
143
+ build.xml, Makefile
144
+
145
+ Files for building the distribution (with ant and make, respectively)
146
+
147
+ models
148
+
149
+ A directory containing trained POS taggers; the taggers end in ".tagger"
150
+ and the props file used to make the taggers end in ".props". The
151
+ ".props" files cannot be directly used on your own machine as they use
152
+ paths on the Stanford NLP machines, but they may serve as examples for
153
+ your own properties files. Included in the full version are other
154
+ English taggers, a German tagger, an Arabic tagger, and a Chinese
155
+ tagger. If you chose to download the smaller version of the tagger,
156
+ you have only two English taggers (left3words is faster but slightly
157
+ less accurate than bidirectional-distsim) - feel free to download any
158
+ other taggers you need from the POS tagger website. More information
159
+ about the models can be found in the README-Models.txt file in this
160
+ directory.
161
+
162
+ sample-input.txt
163
+
164
+ A sample text file that you can tag to demonstrate the tagger.
165
+
166
+ sample-output.txt
167
+
168
+ Tagged output of the tagger (using the left3words model)
169
+
170
+ stanford-postagger-gui.sh
171
+ stanford-postagger-gui.bat
172
+
173
+ Scripts for invoking the GUI demonstration version of the tagger.
174
+
175
+ stanford-postagger.sh
176
+ stanford-postagger.bat
177
+
178
+ Scripts for running the command-line version of the tagger.
179
+
180
+ javadoc
181
+
182
+ Javadocs for the distribution. In particular, look at the javadocs
183
+ for the class edu.stanford.nlp.tagger.maxent.MaxentTagger.
184
+
185
+ TaggerDemo.java
186
+
187
+ A sample file for how to call the tagger in your own program. You
188
+ should be able to compile and run it with:
189
+
190
+ javac -cp stanford-postagger.jar TaggerDemo.java
191
+ java -cp ".:stanford-postagger.jar" TaggerDemo models/wsj-0-18-left3words-distsim.tagger sample-input.txt
192
+
193
+ (If you are on Windows, you need to replace the ":" with a ";" in the
194
+ -cp argument, and should use a "\" in place of the "/" in the filename....)
195
+
196
+ THANKS
197
+ -----------------------------------------------
198
+
199
+ Thanks to the members of the Stanford Natural Language Processing Lab
200
+ for great collaborative work on Java libraries for natural language
201
+ processing.
202
+
203
+ http://nlp.stanford.edu/javanlp/
204
+
205
+ CHANGES
206
+ -----------------------------------------------
207
+
208
+ 2020-11-17 4.2.0 Add currency data for English models.
209
+
210
+ 2020-08-06 4.1.0 Add missing extractor, spanish tokenization
211
+ upgrades
212
+
213
+ 2020-05-22 4.0.0 Model tokenization updated to UDv2.0
214
+
215
+ 2018-10-16 3.9.2 New English models, better currency symbol
216
+ handling
217
+
218
+ 2018-02-27 3.9.1 new French UD model
219
+
220
+ 2017-06-09 3.8.0 new Spanish and French UD models
221
+
222
+ 2016-10-31 3.7.0 Update for compatibility, German UD model
223
+
224
+ 2015-12-09 3.6.0 Updated for compatibility
225
+
226
+ 2015-04-20 3.5.2 Update for compatibility
227
+
228
+ 2015-01-29 3.5.1 General bugfixes
229
+
230
+ 2014-10-26 3.5.0 Upgrade to Java 1.8
231
+
232
+ 2014-08-27 3.4.1 Add Spanish models
233
+
234
+ 2014-06-16 3.4 Using CC tagset for French
235
+
236
+ 2014-01-04 3.3.1 Bugfix release
237
+
238
+ 2013-11-12 3.3.0 Add imperatives to English training data
239
+
240
+ 2013-06-19 3.2.0 Decrease size and improve speed of tagger
241
+ models for all languages
242
+
243
+ 2013-04-04 3.1.5 Speed improvements, ctb7 model, -nthreads
244
+ option
245
+
246
+ 2012-11-11 3.1.4 Updated Chinese model
247
+
248
+ 2012-07-09 3.1.3 Minor bug fixes
249
+
250
+ 2012-05-22 3.1.2 Updated for compatibility with other releases
251
+
252
+ 2012-03-09 3.1.1 Caseless models added
253
+
254
+ 2012-01-06 3.1.0 French tagger added, tagging speed improved
255
+
256
+ 2011-09-14 3.0.4 Updated for compatibility with other releases
257
+
258
+ 2011-06-15 3.0.3 Updated for compatibility with other releases
259
+
260
+ 2011-05-15 3.0.2 Can read training files in TSV format
261
+
262
+ 2011-04-17 3.0.1 Improved German and Arabic models
263
+ Compatible with other Stanford releases
264
+
265
+ 2010-05-21 3.0.0 Re-entrant
266
+
267
+ LICENSE
268
+ -----------------------------------------------
269
+
270
+ Stanford POS Tagger
271
+ Copyright (c) 2002-2010 The Board of Trustees of
272
+ The Leland Stanford Junior University. All Rights Reserved.
273
+
274
+ This program is free software; you can redistribute it and/or
275
+ modify it under the terms of the GNU General Public License
276
+ as published by the Free Software Foundation; either version 2
277
+ of the License, or (at your option) any later version.
278
+
279
+ This program is distributed in the hope that it will be useful,
280
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
281
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
282
+ GNU General Public License for more details.
283
+
284
+ You should have received a copy of the GNU General Public License
285
+ along with this program. If not, see http://www.gnu.org/licenses/ .
286
+
287
+ For more information, bug reports, fixes, contact:
288
+ Christopher Manning
289
+ Dept of Computer Science, Gates 2A
290
+ Stanford CA 94305-9020
291
+ USA
292
+ Support/Questions: [email protected]
293
+ Licensing: [email protected]
294
+ http://nlp.stanford.edu/software/tagger.html
295
+
296
+
297
+ CONTACT
298
+ -----------------------------------------------
299
+
300
+ For questions about the Stanford POS tagger, please feel free to contact
301
+ the Stanford JavaNLP user community at the mailing list
302
+ [email protected]. You need to be a member of this
303
+ mailing list to be able to post to it. Join the list either by emailing
304
+ [email protected] (leave the subject and message
305
+ body empty) or by using the web interface at:
306
+
307
+ https://mailman.stanford.edu/mailman/listinfo/java-nlp-user
308
+
309
+ This is the best list to post to in order to ask questions, make
310
+ announcements, or for discussion among Stanford JavaNLP tool users. We
311
+ provide assistance on a best-effort basis. You can also look at the list
312
+ archives via https://mailman.stanford.edu/pipermail/java-nlp-user/. For
313
+ licensing questions, please see the tagger webpage or contact Stanford
314
+ JavaNLP at [email protected].
315
+
CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo.java ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import edu.stanford.nlp.util.logging.Redwood;
2
+
3
+ import java.io.BufferedReader;
4
+ import java.io.FileReader;
5
+ import java.util.List;
6
+
7
+ import edu.stanford.nlp.ling.SentenceUtils;
8
+ import edu.stanford.nlp.ling.TaggedWord;
9
+ import edu.stanford.nlp.ling.HasWord;
10
+ import edu.stanford.nlp.tagger.maxent.MaxentTagger;
11
+
12
+ public class TaggerDemo {
13
+
14
+ /** A logger for this class */
15
+ private static Redwood.RedwoodChannels log = Redwood.channels(TaggerDemo.class);
16
+
17
+ private TaggerDemo() {}
18
+
19
+ public static void main(String[] args) throws Exception {
20
+ if (args.length != 2) {
21
+ log.info("usage: java TaggerDemo modelFile fileToTag");
22
+ return;
23
+ }
24
+ MaxentTagger tagger = new MaxentTagger(args[0]);
25
+ List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
26
+ for (List<HasWord> sentence : sentences) {
27
+ List<TaggedWord> tSentence = tagger.tagSentence(sentence);
28
+ System.out.println(SentenceUtils.listToString(tSentence, false));
29
+ }
30
+ }
31
+
32
+ }
CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo2.java ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import edu.stanford.nlp.util.logging.Redwood;
2
+
3
+ import java.io.BufferedReader;
4
+ import java.io.FileInputStream;
5
+ import java.io.InputStreamReader;
6
+ import java.io.OutputStreamWriter;
7
+ import java.io.PrintWriter;
8
+ import java.util.List;
9
+
10
+ import edu.stanford.nlp.ling.SentenceUtils;
11
+ import edu.stanford.nlp.ling.TaggedWord;
12
+ import edu.stanford.nlp.ling.HasWord;
13
+ import edu.stanford.nlp.ling.CoreLabel;
14
+ import edu.stanford.nlp.process.CoreLabelTokenFactory;
15
+ import edu.stanford.nlp.process.DocumentPreprocessor;
16
+ import edu.stanford.nlp.process.PTBTokenizer;
17
+ import edu.stanford.nlp.process.TokenizerFactory;
18
+ import edu.stanford.nlp.tagger.maxent.MaxentTagger;
19
+
20
+ /** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
21
+ * being tagged by the tagger. The sentences are generated by direct use
22
+ * of the DocumentPreprocessor class.
23
+ *
24
+ * @author Christopher Manning
25
+ */
26
+ public class TaggerDemo2 {
27
+
28
+ /** A logger for this class */
29
+ private static Redwood.RedwoodChannels log = Redwood.channels(TaggerDemo2.class);
30
+
31
+ private TaggerDemo2() {}
32
+
33
+ public static void main(String[] args) throws Exception {
34
+ if (args.length != 2) {
35
+ log.info("usage: java TaggerDemo2 modelFile fileToTag");
36
+ return;
37
+ }
38
+ MaxentTagger tagger = new MaxentTagger(args[0]);
39
+ TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
40
+ "untokenizable=noneKeep");
41
+ BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
42
+ PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
43
+ DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
44
+ documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
45
+ for (List<HasWord> sentence : documentPreprocessor) {
46
+ List<TaggedWord> tSentence = tagger.tagSentence(sentence);
47
+ pw.println(SentenceUtils.listToString(tSentence, false));
48
+ }
49
+
50
+ // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
51
+ List<HasWord> sent = SentenceUtils.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
52
+ List<TaggedWord> taggedSent = tagger.tagSentence(sent);
53
+ for (TaggedWord tw : taggedSent) {
54
+ if (tw.tag().startsWith("JJ")) {
55
+ pw.println(tw.word());
56
+ }
57
+ }
58
+
59
+ pw.close();
60
+ }
61
+
62
+ }
CoreNLP/stanford-postagger-full-2020-11-17/build.xml ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- build.xml file for ant for JavaNLP -->
2
+
3
+ <!-- Before using this, unjar the sources' jar file into the src/ directory! -->
4
+
5
+ <!-- A "project" describes a set of targets that may be requested
6
+ when Ant is executed. The "default" attribute defines the
7
+ target which is executed if no specific target is requested,
8
+ and the "basedir" attribute defines the current working directory
9
+ from which Ant executes the requested task. This is normally
10
+ set to the current working directory.
11
+ -->
12
+
13
+ <project name="JavaNLP" default="compile" basedir=".">
14
+
15
+ <property name="build.home" value="${basedir}/classes"/>
16
+ <property name="build.tests" value="${basedir}/classes"/>
17
+ <property name="docs.home" value="${basedir}/docs"/>
18
+ <property name="src.home" value="${basedir}/src"/>
19
+ <property name="javadoc.home" value="${basedir}/javadoc"/>
20
+
21
+
22
+ <!-- ==================== Compilation Control Options ==================== -->
23
+
24
+ <!--
25
+
26
+ These properties control option settings on the Javac compiler when it
27
+ is invoked using the <javac> task.
28
+
29
+ compile.debug Should compilation include the debug option?
30
+
31
+ compile.deprecation Should compilation include the deprecation option?
32
+
33
+ compile.optimize Should compilation include the optimize option?
34
+
35
+ compile.source Source version compatibility
36
+
37
+ compile.target Target class version compatibility
38
+
39
+ -->
40
+
41
+ <property name="compile.debug" value="true"/>
42
+ <property name="compile.deprecation" value="false"/>
43
+ <property name="compile.optimize" value="true"/>
44
+ <property name="compile.source" value="1.8" />
45
+ <property name="compile.target" value="1.8" />
46
+ <property name="compile.encoding" value="utf-8" />
47
+
48
+
49
+
50
+
51
+ <!-- ==================== All Target ====================================== -->
52
+
53
+ <!--
54
+
55
+ The "all" target is a shortcut for running the "clean" target followed
56
+ by the "compile" target, to force a complete recompile.
57
+
58
+ -->
59
+
60
+ <target name="all" depends="clean,compile"
61
+ description="Clean build and dist directories, then compile"/>
62
+
63
+
64
+
65
+ <!-- ==================== Clean Target ==================================== -->
66
+
67
+ <!--
68
+
69
+ The "clean" target deletes any previous "build" and "dist" directory,
70
+ so that you can be ensured the application can be built from scratch.
71
+
72
+ -->
73
+
74
+ <target name="clean" description="Delete old classes">
75
+ <delete dir="${build.home}/edu"/>
76
+ </target>
77
+
78
+
79
+
80
+ <!-- ==================== Compile Target ================================== -->
81
+
82
+ <!--
83
+
84
+ The "compile" target transforms source files (from your "src" directory)
85
+ into object files in the appropriate location in the build directory.
86
+ This example assumes that you will be including your classes in an
87
+ unpacked directory hierarchy under "/WEB-INF/classes".
88
+
89
+ -->
90
+
91
+ <target name="compile" depends="prepare"
92
+ description="Compile Java sources">
93
+
94
+ <!-- Compile Java classes as necessary -->
95
+ <mkdir dir="${build.home}"/>
96
+ <javac srcdir="${src.home}"
97
+ destdir="${build.home}"
98
+ debug="${compile.debug}"
99
+ encoding="${compile.encoding}"
100
+ deprecation="${compile.deprecation}"
101
+ optimize="${compile.optimize}"
102
+ source="${compile.source}"
103
+ target="${compile.target}"
104
+ includeantruntime="false">
105
+ <compilerarg value="-Xmaxerrs"/>
106
+ <compilerarg value="20"/>
107
+ <!-- <compilerarg value="-Xlint"/> -->
108
+ </javac>
109
+
110
+ <!-- Copy application resources -->
111
+ <!--
112
+ <copy todir="${build.home}/WEB-INF/classes">
113
+ <fileset dir="${src.home}" excludes="**/*.java"/>
114
+ </copy>
115
+ -->
116
+
117
+ </target>
118
+
119
+
120
+ <!-- ==================== Jar Target ================================== -->
121
+
122
+ <!--
123
+ The "jar" target recreates the jar file, which you may want to do if
124
+ you take apart the source jar and change something.
125
+ -->
126
+
127
+ <property name="jar.output" value="stanford-postagger.jar" />
128
+ <property name="jar.mainclass" value="edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI" />
129
+
130
+ <target name="jar" depends="compile" description="Build a jar file">
131
+ <jar destfile="${jar.output}">
132
+ <fileset dir="${build.home}"/>
133
+ <manifest>
134
+ <attribute name="Main-Class" value="${jar.mainclass}"/>
135
+ </manifest>
136
+ </jar>
137
+ </target>
138
+
139
+ <!-- ==================== Javadoc Target ================================== -->
140
+
141
+ <!--
142
+
143
+ The "javadoc" target creates Javadoc API documentation for the Java
144
+ classes included in your application. Normally, this is only required
145
+ when preparing a distribution release, but is available as a separate
146
+ target in case the developer wants to create Javadocs independently.
147
+
148
+ -->
149
+
150
+ <target name="javadoc" depends="compile"
151
+ description="Create Javadoc API documentation">
152
+
153
+ <mkdir dir="${javadoc.home}"/>
154
+ <javadoc sourcepath="${src.home}"
155
+ destdir="${javadoc.home}"
156
+ maxmemory="1g"
157
+ author="true"
158
+ source="${compile.source}"
159
+ overview="${src.home}/edu/stanford/nlp/overview.html"
160
+ doctitle="Stanford JavaNLP API Documentation"
161
+ windowtitle="Stanford JavaNLP API"
162
+ encoding="${compile.encoding}"
163
+ docencoding="${compile.encoding}"
164
+ charset="${compile.encoding}"
165
+ packagenames="*">
166
+ <!-- Allow @generated, @modifiable and @ordered tags -->
167
+ <tag name="generated" scope="all" description="Generated" />
168
+ <tag name="modifiable" scope="all" description="Modifiable" />
169
+ <tag name="ordered" scope="all" description="Ordered" />
170
+ <!-- Depends on lib and classes folders -->
171
+ <classpath>
172
+ <pathelement path="${build.home}" />
173
+ </classpath>
174
+ <bottom><![CDATA[<font size="2"><a href="https://nlp.stanford.edu" target="_top">Stanford NLP Group</a></font>]]></bottom>
175
+ <link href="https://docs.oracle.com/javase/8/docs/api/"/>
176
+ </javadoc>
177
+
178
+ </target>
179
+
180
+
181
+ <!-- ==================== Prepare Target ================================== -->
182
+
183
+ <!--
184
+
185
+ The "prepare" target is used to create the "build" destination directory,
186
+ and copy the static contents of your web application to it. If you need
187
+ to copy static files from external dependencies, you can customize the
188
+ contents of this task.
189
+
190
+ Normally, this task is executed indirectly when needed.
191
+
192
+ -->
193
+
194
+ <target name="prepare">
195
+
196
+ <!-- Create build directories as needed -->
197
+ <mkdir dir="${build.home}"/>
198
+
199
+ </target>
200
+
201
+ </project>
CoreNLP/stanford-postagger-full-2020-11-17/data/enclitic-inflections.data ADDED
The diff for this file is too large to render. See raw diff
 
CoreNLP/stanford-postagger-full-2020-11-17/models/README-Models.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Stanford POS Tagger, v4.2.0 - 2020-11-17
2
+ Copyright (c) 2002-2020 The Board of Trustees of
3
+ The Leland Stanford Junior University. All Rights Reserved.
4
+
5
+ This document contains (some) information about the models included in
6
+ this release and that may be downloaded for the POS tagger website at
7
+ http://nlp.stanford.edu/software/tagger.html . All of the models mentioned
8
+ in this document are in the downloaded package in the same directory as this
9
+ readme. All taggers are accompanied by the props files used to create
10
+ them; please examine these files for more detailed information about the
11
+ creation of the taggers.
12
+
13
+ For English, the bidirectional taggers are slightly more accurate, but
14
+ tag much more slowly; choose the appropriate tagger based on your
15
+ speed/performance needs.
16
+
17
+ English taggers
18
+ ---------------------------
19
+ english-left3words-distsim.tagger
20
+ Trained on WSJ sections 0-18 and extra parser training data using the
21
+ left3words architecture and includes word shape and distributional
22
+ similarity features. Penn tagset. UDv2.0 tokenization standard.
23
+
24
+ english-bidirectional-distsim.tagger
25
+ Trained on WSJ sections 0-18 using a bidirectional architecture and
26
+ including word shape and distributional similarity features.
27
+ Penn Treebank tagset. UDv2.0 tokenization standard.
28
+
29
+ english-caseless-left3words-distsim.tagger
30
+ Trained on WSJ sections 0-18 and extra parser training data using the
31
+ left3words architecture and includes word shape and distributional
32
+ similarity features. Penn tagset. Ignores case. UDv2.0 tokenization
33
+ standard.
34
+
35
+
36
+ Chinese tagger
37
+ ---------------------------
38
+ chinese-nodistsim.tagger
39
+ Trained on a combination of CTB7 texts from Chinese and Hong Kong
40
+ sources.
41
+ LDC Chinese Treebank POS tag set.
42
+
43
+ chinese-distsim.tagger
44
+ Trained on a combination of CTB7 texts from Chinese and Hong Kong
45
+ sources with distributional similarity clusters.
46
+ LDC Chinese Treebank POS tag set.
47
+
48
+ Arabic tagger
49
+ ---------------------------
50
+ arabic.tagger
51
+ Trained on the *entire* ATB p1-3.
52
+ When trained on the train part of the ATB p1-3 split done for the 2005
53
+ JHU Summer Workshop (Diab split), using (augmented) Bies tags, it gets
54
+
55
+ French tagger
56
+ ---------------------------
57
+ french-ud.tagger
58
+ Trained on the French GSD (UDv2.2) data set
59
+
60
+ German tagger
61
+ ---------------------------
62
+ german-ud.tagger
63
+ Trained on the German GSD (UDv2.2) data set
64
+
65
+ Spanish tagger
66
+ --------------------------
67
+ spanish-ud.tagger
68
+ Trained on the Spanish AnCora (UDv2.0) data set
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ea63d5a2b78cc06565accfaac5abdd64b031a422d0e09e189594098671e5e6
3
+ size 2577597
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Wed Jan 03 12:02:30 PST 2018 with arguments:
2
+ model = arabic-train.tagger
3
+ arch = words(-2,2),order(1),prefix(6),suffix(6),unicodeshapes(1)
4
+ wordFunction =
5
+ trainFile = format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Train.utf8.txt
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 1
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = /
12
+ encoding = UTF-8
13
+ iterations = 100
14
+ lang = arabic
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 3
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 3
19
+ rareWordThresh = 5
20
+ search = owlqn
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.75
24
+ tagInside =
25
+ tokenize = false
26
+ tokenizerFactory = edu.stanford.nlp.process.WhitespaceTokenizer
27
+ tokenizerOptions =
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput =
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07351166e384349a0e1a5d1a8b2987f88a5dcbad9bfb44e24444028802dbd41
3
+ size 2944508
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Wed Jan 03 12:24:48 PST 2018 with arguments:
2
+ model = arabic.tagger
3
+ arch = words(-2,2),order(1),prefix(6),suffix(6),unicodeshapes(1)
4
+ wordFunction =
5
+ trainFile = format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Train.utf8.txt;format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Dev.utf8.txt;format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Test.utf8.txt
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 1
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = /
12
+ encoding = UTF-8
13
+ iterations = 100
14
+ lang = arabic
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 3
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 3
19
+ rareWordThresh = 5
20
+ search = owlqn
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.75
24
+ tagInside =
25
+ tokenize = false
26
+ tokenizerFactory = edu.stanford.nlp.process.WhitespaceTokenizer
27
+ tokenizerOptions =
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput =
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca40fdab701679ac93e9f075da134e2906f3d1a88b084bc0c018506fa7a68e4b
3
+ size 13866197
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Fri Feb 14 01:19:49 PST 2014 with arguments:
2
+ model = chinese-distsim.tagger
3
+ arch = generic,suffix(4),prefix(4),unicodeshapes(-1,1),unicodeshapeconjunction(-1,1),words(-2,-2),words(2,2),distsim(/u/nlp/data/chinese/distsim/xin_cmn_2000-2010.ldc.seg.utf8.1M.random-c1000,-1,1),distsimconjunction(/u/nlp/data/chinese/distsim/xin_cmn_2000-2010.ldc.seg.utf8.1M.random-c1000,-1,1)
4
+ wordFunction = edu.stanford.nlp.util.UTF8EquivalenceFunction
5
+ trainFile = format=TREES,/u/nlp/data/chinese/ctb7/train.mrg
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 1
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = #
12
+ encoding = utf-8
13
+ iterations = 100
14
+ lang = chinese
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 3
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 3
19
+ rareWordThresh = 20
20
+ search = owlqn
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.75
24
+ tagInside =
25
+ tokenize = false
26
+ tokenizerFactory =
27
+ tokenizerOptions =
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput = null
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4178289b565a439f4bd2f2216d770e37f8ae568dcc34b1278c1489d812a8ca
3
+ size 3963731
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Fri Feb 14 02:20:03 PST 2014 with arguments:
2
+ model = chinese-nodistsim.tagger
3
+ arch = generic,suffix(4),prefix(4),unicodeshapes(-1,1),unicodeshapeconjunction(-1,1),words(-2,-2),words(2,2)
4
+ wordFunction = edu.stanford.nlp.util.UTF8EquivalenceFunction
5
+ trainFile = format=TREES,/u/nlp/data/chinese/ctb7/train.mrg
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 1
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = #
12
+ encoding = utf-8
13
+ iterations = 100
14
+ lang = chinese
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 3
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 3
19
+ rareWordThresh = 20
20
+ search = owlqn
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.75
24
+ tagInside =
25
+ tokenize = false
26
+ tokenizerFactory =
27
+ tokenizerOptions =
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput = null
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a363a88c67b25e793a9382254485d897e95b0e166cc34af314ec3d53d79899b1
3
+ size 20045086
CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-bidirectional-distsim-prod1.tagger
2
+ arch = bidirectional5words,allwordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUCase),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CompanyNameDetector),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorAllCapitalized),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUpperDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorStartSentenceCap),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCapC),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCap),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
3
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
4
+ trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/handparsed-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/train-tech-english.txt
5
+ closedClassTags =
6
+ closedClassTagThreshold = 40
7
+ curWordMinFeatureThresh = 2
8
+ debug = false
9
+ debugPrefix =
10
+ tagSeparator = _
11
+ encoding = UTF-8
12
+ iterations = 100
13
+ lang = english
14
+ learnClosedClassTags = false
15
+ minFeatureThresh = 2
16
+ openClassTags =
17
+ rareWordMinFeatureThresh = 5
18
+ rareWordThresh = 5
19
+ search = owlqn
20
+ sgml = false
21
+ sigmaSquared = 0.5
22
+ regL1 = 0.75
23
+ tagInside =
24
+ tokenize = true
25
+ tokenizerFactory =
26
+ tokenizerOptions =
27
+ verbose = false
28
+ verboseResults = true
29
+ veryCommonWordThresh = 250
30
+ xmlInput =
31
+ outputFile =
32
+ outputFormat = slashTags
33
+ outputFormatOptions =
34
+ nthreads = 1
35
+ minWordsLockTags = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f57ba3bca617dc8732b7d1c66f16a0f147cccbc19315938c0e03a6d4569c0488
3
+ size 13937235
CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger.props ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Sat Feb 08 23:44:06 PST 2020 with arguments:
2
+ model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-caseless-left3words-distsim-prod2.tagger
3
+ arch = left3words,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CaselessCompanyNameDetector),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
4
+ wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
5
+ trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/train-tech-english.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/handparsed-train.txt
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 2
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = _
12
+ encoding = UTF-8
13
+ iterations = 100
14
+ lang = english
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 2
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 5
19
+ rareWordThresh = 5
20
+ search = owlqn
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.75
24
+ tagInside =
25
+ tokenize = true
26
+ tokenizerFactory =
27
+ tokenizerOptions =
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput =
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 1
36
+ minWordsLockTags = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb5f7454da95775ecdb3ee20d3c58488cd87aa9999585951645f949e962089f
3
+ size 15198877
CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-left3words-distsim-prod1.tagger
2
+ arch = left3words,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUCase),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CompanyNameDetector),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorAllCapitalized),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUpperDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorStartSentenceCap),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCapC),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCap),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
3
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
4
+ trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/handparsed-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/train-tech-english.txt
5
+ closedClassTags =
6
+ closedClassTagThreshold = 40
7
+ curWordMinFeatureThresh = 2
8
+ debug = false
9
+ debugPrefix =
10
+ tagSeparator = _
11
+ encoding = UTF-8
12
+ iterations = 100
13
+ lang = english
14
+ learnClosedClassTags = false
15
+ minFeatureThresh = 2
16
+ openClassTags =
17
+ rareWordMinFeatureThresh = 5
18
+ rareWordThresh = 5
19
+ search = owlqn
20
+ sgml = false
21
+ sigmaSquared = 0.5
22
+ regL1 = 0.75
23
+ tagInside =
24
+ tokenize = true
25
+ tokenizerFactory =
26
+ tokenizerOptions =
27
+ verbose = false
28
+ verboseResults = true
29
+ veryCommonWordThresh = 250
30
+ xmlInput =
31
+ outputFile =
32
+ outputFormat = slashTags
33
+ outputFormatOptions =
34
+ nthreads = 1
35
+ minWordsLockTags = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71ac1193d406f368d0525edc2ab295e7138c184c06a27f6363caa88429d4526
3
+ size 1591008
CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Mon Jul 01 00:54:56 PDT 2019 with arguments:
2
+ model = french-ud.tagger
3
+ arch = left3words,naacl2003unknowns,unicodeshapes(-1,1)
4
+ wordFunction =
5
+ trainFile = format=TSV,wordColumn=1,tagColumn=3,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/fr_gsd-ud-train.conllu.clean
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 2
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = _
12
+ encoding = utf-8
13
+ iterations = 100
14
+ lang = french
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 2
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 10
19
+ rareWordThresh = 5
20
+ search = owlqn2
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.75
24
+ tagInside =
25
+ tokenize = true
26
+ tokenizerFactory =
27
+ tokenizerOptions = asciiQuotes
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput = null
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1537adaf468f4005882207c88fb3a7eda93d82764b24d1de7900776131fc8878
3
+ size 71524450
CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Mon Jul 01 01:05:00 PDT 2019 with arguments:
2
+ model = german-ud.tagger
3
+ arch = left3words,naacl2003unknowns,unicodeshapes(-2,2),distsim(/u/nlp/data/german/ner/hgc_175m_600,-1,1),distsimconjunction(/u/nlp/data/german/ner/hgc_175m_600,-1,1),unicodeshapeconjunction(-1,1)
4
+ wordFunction =
5
+ trainFile = format=TSV,wordColumn=1,tagColumn=3,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/de_gsd-ud-train.conllu.clean
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 2
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = _
12
+ encoding = utf-8
13
+ iterations = 100
14
+ lang = german
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 2
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 10
19
+ rareWordThresh = 5
20
+ search = owlqn2
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.625
24
+ tagInside =
25
+ tokenize = true
26
+ tokenizerFactory =
27
+ tokenizerOptions = asciiQuotes
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput = null
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 1
CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dba9b09f6cb0e0f58ee0d694ad5c920dec3a08c89c952c64fa52b67011e7e5d
3
+ size 9443457
CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Mon Jul 01 01:04:01 PDT 2019 with arguments:
2
+ model = spanish-ud.tagger
3
+ arch = left3words,naacl2003unknowns,allwordshapes(-1,1),distsim(/u/nlp/data/spanish/distsim/spanish.spence512.cls,-1,1),distsimconjunction(/u/nlp/data/spanish/distsim/spanish.spence512.cls,-1,1)
4
+ wordFunction =
5
+ trainFile = format=TSV,wordColumn=1,tagColumn=4,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/es_ancora-ud-train.conllu.clean
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 2
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = _
12
+ encoding = utf-8
13
+ iterations = 100
14
+ lang = spanish
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 2
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 10
19
+ rareWordThresh = 5
20
+ search = owlqn2
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.75
24
+ tagInside =
25
+ tokenize = true
26
+ tokenizerFactory =
27
+ tokenizerOptions = asciiQuotes
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput = null
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 16
CoreNLP/stanford-postagger-full-2020-11-17/sample-input.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ A passenger plane has crashed shortly after take-off from Kyrgyzstan's
2
+ capital, Bishkek, killing a large number of those on board. The head of
3
+ Kyrgyzstan's civil aviation authority said that out of about 90
4
+ passengers and crew, only about 20 people have survived. The Itek Air
5
+ Boeing 737 took off bound for Mashhad, in north-eastern Iran, but turned
6
+ round some 10 minutes later.
CoreNLP/stanford-postagger-full-2020-11-17/sample-output.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ A_DT passenger_NN plane_NN has_VBZ crashed_VBN shortly_RB after_IN take-off_NN from_IN Kyrgyzstan_NNP 's_POS capital_NN ,_, Bishkek_NNP ,_, killing_VBG a_DT large_JJ number_NN of_IN those_DT on_IN board_NN ._.
2
+ The_DT head_NN of_IN Kyrgyzstan_NNP 's_POS civil_JJ aviation_NN authority_NN said_VBD that_IN out_IN of_IN about_IN 90_CD passengers_NNS and_CC crew_NN ,_, only_RB about_IN 20_CD people_NNS have_VBP survived_VBN ._.
3
+ The_DT Itek_NNP Air_NNP Boeing_NNP 737_CD took_VBD off_RP bound_VBN for_IN Mashhad_NNP ,_, in_IN north-eastern_JJ Iran_NNP ,_, but_CC turned_VBD round_NN some_DT 10_CD minutes_NNS later_RB ._.
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:197a84a195a7fbdef461144b07f8b7475b7145f3643a45f5ead295367fe6d323
3
+ size 4429314
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e7f56b55bd3ec2a5236c7a57b28923bce6a2d72d9faa5ab6fa98309ff5f25e4
3
+ size 2919886
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6090106c57da13d2ac8a1b2798dd7f437e07a9909a00f917e884bf6fa52fc8d
3
+ size 3650039
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.bat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ :: runs the POS tagger (toy) GUI
2
+ :: usage stanford-postagger-gui
3
+ java -mx200m -cp "stanford-postagger.jar;" edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/sh
2
+ java -mx200m -cp 'stanford-postagger.jar:' edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.bat ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ :: usage: stanford-postagger model textFile
2
+ :: e.g., stanford-postagger models\english-left3words-distsim.tagger sample-input.txt
3
+
4
+ java -mx300m -cp "stanford-postagger.jar;" edu.stanford.nlp.tagger.maxent.MaxentTagger -model %1 -textFile %2
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6090106c57da13d2ac8a1b2798dd7f437e07a9909a00f917e884bf6fa52fc8d
3
+ size 3650039
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ #
3
+ # usage: ./stanford-postagger.sh model textFile
4
+ # e.g., ./stanford-postagger.sh models/english-left3words-distsim.tagger sample-input.txt
5
+
6
+ java -mx300m -cp 'stanford-postagger.jar:' edu.stanford.nlp.tagger.maxent.MaxentTagger -model $1 -textFile $2
UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll ADDED
Binary file (76.8 kB). View file
 
UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll.config ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <configuration>
3
+ <runtime>
4
+ <assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
5
+ <dependentAssembly>
6
+ <assemblyIdentity name="System.Diagnostics.DiagnosticSource" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
7
+ <bindingRedirect oldVersion="0.0.0.0-4.0.3.1" newVersion="4.0.3.1" />
8
+ </dependentAssembly>
9
+ <dependentAssembly>
10
+ <assemblyIdentity name="Polenter.SharpSerializer" publicKeyToken="8f4f20011571ee5f" culture="neutral" />
11
+ <bindingRedirect oldVersion="0.0.0.0-3.0.1.0" newVersion="3.0.1.0" />
12
+ </dependentAssembly>
13
+ <dependentAssembly>
14
+ <assemblyIdentity name="Telerik.Windows.Documents.Flow" publicKeyToken="5803cfa389c90ce7" culture="neutral" />
15
+ <bindingRedirect oldVersion="0.0.0.0-2018.2.511.40" newVersion="2018.2.511.40" />
16
+ </dependentAssembly>
17
+ <dependentAssembly>
18
+ <assemblyIdentity name="Telerik.Windows.Documents.Core" publicKeyToken="5803cfa389c90ce7" culture="neutral" />
19
+ <bindingRedirect oldVersion="0.0.0.0-2018.2.511.40" newVersion="2018.2.511.40" />
20
+ </dependentAssembly>
21
+ <dependentAssembly>
22
+ <assemblyIdentity name="Newtonsoft.Json" publicKeyToken="30ad4fe6b2a6aeed" culture="neutral" />
23
+ <bindingRedirect oldVersion="0.0.0.0-13.0.0.0" newVersion="13.0.0.0" />
24
+ </dependentAssembly>
25
+ <dependentAssembly>
26
+ <assemblyIdentity name="PostSharp" publicKeyToken="b13fd38b8f9c99d7" culture="neutral" />
27
+ <bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
28
+ </dependentAssembly>
29
+ <dependentAssembly>
30
+ <assemblyIdentity name="PostSharp.Patterns.Aggregation" publicKeyToken="e7f631e6ce13f078" culture="neutral" />
31
+ <bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
32
+ </dependentAssembly>
33
+ <dependentAssembly>
34
+ <assemblyIdentity name="PostSharp.Patterns.Common" publicKeyToken="e7f631e6ce13f078" culture="neutral" />
35
+ <bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
36
+ </dependentAssembly>
37
+ <dependentAssembly>
38
+ <assemblyIdentity name="System.Runtime.CompilerServices.Unsafe" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
39
+ <bindingRedirect oldVersion="0.0.0.0-6.0.0.0" newVersion="6.0.0.0" />
40
+ </dependentAssembly>
41
+ <dependentAssembly>
42
+ <assemblyIdentity name="K4os.Hash.xxHash" publicKeyToken="32cd54395057cec3" culture="neutral" />
43
+ <bindingRedirect oldVersion="0.0.0.0-1.0.8.0" newVersion="1.0.8.0" />
44
+ </dependentAssembly>
45
+ <dependentAssembly>
46
+ <assemblyIdentity name="System.Memory" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
47
+ <bindingRedirect oldVersion="0.0.0.0-4.0.1.2" newVersion="4.0.1.2" />
48
+ </dependentAssembly>
49
+ <dependentAssembly>
50
+ <assemblyIdentity name="System.Net.Http" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
51
+ <bindingRedirect oldVersion="0.0.0.0-4.1.0.3" newVersion="4.1.0.3" />
52
+ </dependentAssembly>
53
+ <dependentAssembly>
54
+ <assemblyIdentity name="System.Buffers" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
55
+ <bindingRedirect oldVersion="0.0.0.0-4.0.3.0" newVersion="4.0.3.0" />
56
+ </dependentAssembly>
57
+ <dependentAssembly>
58
+ <assemblyIdentity name="System.IO.Pipelines" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
59
+ <bindingRedirect oldVersion="0.0.0.0-8.0.0.0" newVersion="8.0.0.0" />
60
+ </dependentAssembly>
61
+ <dependentAssembly>
62
+ <assemblyIdentity name="Microsoft.Bcl.AsyncInterfaces" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
63
+ <bindingRedirect oldVersion="0.0.0.0-8.0.0.0" newVersion="8.0.0.0" />
64
+ </dependentAssembly>
65
+ <dependentAssembly>
66
+ <assemblyIdentity name="System.Security.Cryptography.X509Certificates" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
67
+ <bindingRedirect oldVersion="0.0.0.0-4.1.1.2" newVersion="4.1.1.2" />
68
+ </dependentAssembly>
69
+ <dependentAssembly>
70
+ <assemblyIdentity name="Microsoft.Win32.Primitives" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
71
+ <bindingRedirect oldVersion="0.0.0.0-4.0.1.0" newVersion="4.0.1.0" />
72
+ </dependentAssembly>
73
+ <dependentAssembly>
74
+ <assemblyIdentity name="System.Security.Cryptography.Encoding" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
75
+ <bindingRedirect oldVersion="0.0.0.0-4.0.1.0" newVersion="4.0.1.0" />
76
+ </dependentAssembly>
77
+ <dependentAssembly>
78
+ <assemblyIdentity name="System.IO.Compression" publicKeyToken="b77a5c561934e089" culture="neutral" />
79
+ <bindingRedirect oldVersion="0.0.0.0-4.1.2.0" newVersion="4.1.2.0" />
80
+ </dependentAssembly>
81
+ </assemblyBinding>
82
+ </runtime>
83
+ <startup><supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" /></startup></configuration>
UDPipe/v1/XDependencies/UDPipe/AUTHORS ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Milan Straka <[email protected]>
2
+ Jana Straková <[email protected]>