CoreNLP, UDPipe
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +104 -0
- CoreNLP/models/ru/parser/nndep.rus.model81_mf.txt.gz +3 -0
- CoreNLP/models/ru/parser/nndep.rus.model90.9_88.6.txt.gz +3 -0
- CoreNLP/models/ru/parser/nndep.rus.modelAr100HS400.txt.gz +3 -0
- CoreNLP/models/ru/parser/nndep.rus.modelMFAr100HS400_81.txt.gz +3 -0
- CoreNLP/models/ru/parser/nndep.rus.modelMFWiki100HS400_80.txt.gz +3 -0
- CoreNLP/models/ru/tagger/dict.tsv +3 -0
- CoreNLP/models/ru/tagger/russian-ud-mf.tagger +3 -0
- CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger +3 -0
- CoreNLP/models/ru/tagger/russian-ud-pos.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/LICENSE.txt +339 -0
- CoreNLP/stanford-postagger-full-2020-11-17/README.txt +315 -0
- CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo.java +32 -0
- CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo2.java +62 -0
- CoreNLP/stanford-postagger-full-2020-11-17/build.xml +201 -0
- CoreNLP/stanford-postagger-full-2020-11-17/data/enclitic-inflections.data +0 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/README-Models.txt +68 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger.props +36 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger.props +35 -0
- CoreNLP/stanford-postagger-full-2020-11-17/sample-input.txt +6 -0
- CoreNLP/stanford-postagger-full-2020-11-17/sample-output.txt +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.bat +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.sh +2 -0
- CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.bat +4 -0
- CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar +3 -0
- CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.sh +6 -0
- UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll +0 -0
- UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll.config +83 -0
- UDPipe/v1/XDependencies/UDPipe/AUTHORS +2 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,107 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
CoreNLP/models/ru/tagger/dict.tsv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
CoreNLP/models/ru/tagger/russian-ud-mf.tagger filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
CoreNLP/models/ru/tagger/russian-ud-pos.tagger filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
UDPipe/v1/binary/linux32/csharp/libudpipe_csharp.so filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
UDPipe/v1/binary/linux32/java/libudpipe_java.so filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
UDPipe/v1/binary/linux32/udpipe filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
UDPipe/v1/binary/linux64/csharp/libudpipe_csharp.so filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
UDPipe/v1/binary/linux64/java/libudpipe_java.so filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
UDPipe/v1/binary/linux64/udpipe filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
UDPipe/v1/binary/linuxArmv7/libudpipe.a filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
UDPipe/v1/binary/linuxArmv7/rest_server/udpipe_server filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
UDPipe/v1/binary/linuxArmv7/udpipe filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
UDPipe/v1/binary/osx/csharp/libudpipe_csharp.dylib filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
UDPipe/v1/binary/osx/java/libudpipe_java.dylib filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
UDPipe/v1/binary/osx/udpipe filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
UDPipe/v1/binary/win64/csharp/udpipe_csharp.dll filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
UDPipe/v1/binary/win64/java/udpipe_java.dll filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
UDPipe/v1/binary/win64/udpipe.exe filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
UDPipe/v1/udpipe_csharp/x64/udpipe_csharp.dll filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
UDPipe/v1/XDependencies/UDPipe/MANUAL.pdf filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Ancient_greek-PROIEL.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Ancient_greek.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Arabic.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Basque.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Belarusian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Bulgarian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Catalan.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Chinese.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Coptic.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Croatian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Czech-CAC.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Czech-CLTT.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Czech.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Danish.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Dutch-LASSYSMALL.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Dutch.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
UDPipe/v1/XDependencies/UDPipe/Models/English-LINES.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
UDPipe/v1/XDependencies/UDPipe/Models/English-PARTUT.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
UDPipe/v1/XDependencies/UDPipe/Models/English.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Estonian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Finnish-FTB.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Finnish.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
UDPipe/v1/XDependencies/UDPipe/Models/French-PARTUT.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
UDPipe/v1/XDependencies/UDPipe/Models/French-SEQUOIA.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
UDPipe/v1/XDependencies/UDPipe/Models/French.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Galician-TREEGAL.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Galician.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
UDPipe/v1/XDependencies/UDPipe/Models/German.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Gothic.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Greek.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Hebrew.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Hindi.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Hungarian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Indonesian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Irish.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Italian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Japanese.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Kazakh.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Korean.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Latin-ITTB.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Latin-PROIEL.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Latin.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Latvian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Lithuanian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Norwegian-BOKMAAL.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Norwegian-NYNORSK.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Old_church_slavonic.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Persian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Polish.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Portuguese-BR.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Portuguese.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Romanian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Russian-SYNTAGRUS.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Russian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Sanskrit.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Slovak.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Slovenian-SST.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Slovenian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Spanish-ANCORA.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Spanish.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Swedish-LINES.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Swedish.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Tamil.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Turkish.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Ukrainian.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Urdu.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Uyghur.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
UDPipe/v1/XDependencies/UDPipe/Models/Vietnamese.udpipe filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
UDPipe/v1/XDependencies/UDPipe/udpipe.exe filter=lfs diff=lfs merge=lfs -text
|
CoreNLP/models/ru/parser/nndep.rus.model81_mf.txt.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:326fc20cdd0602da6ea44279f4d04761670bb03d86297a9be59d8c921bd11580
|
| 3 |
+
size 115029311
|
CoreNLP/models/ru/parser/nndep.rus.model90.9_88.6.txt.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f8e0cb78116c4fba174b6ab19dccc7867b7cea2dd44ccea7067ee949cb04e69
|
| 3 |
+
size 118576259
|
CoreNLP/models/ru/parser/nndep.rus.modelAr100HS400.txt.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91b09c5d84c44ca668372b78f8c20235fb3fe701df25169dd0182e4830043b29
|
| 3 |
+
size 127444031
|
CoreNLP/models/ru/parser/nndep.rus.modelMFAr100HS400_81.txt.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e5ea42afc93a9f9a6c7c915451d1257e6455faaa3f01b06bd1643af3bafaff2
|
| 3 |
+
size 118710465
|
CoreNLP/models/ru/parser/nndep.rus.modelMFWiki100HS400_80.txt.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8afeae32c7675e237f5cef4f225e9ed92d8c66b80e37b06fe710cfcb6f558617
|
| 3 |
+
size 119027293
|
CoreNLP/models/ru/tagger/dict.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01234d489dc349b61843573e725f2fd3e265e004a1408a6712c5f6fafca5d506
|
| 3 |
+
size 129696832
|
CoreNLP/models/ru/tagger/russian-ud-mf.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd10da6d84ec2f8d150d48147dac16d2209b2bd09af0e9740c7fa18abe5b1e8a
|
| 3 |
+
size 27644829
|
CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c85cdd823a4c7ea7c62af4607b425b6a303f414d992a22c067d6be58f2a231df
|
| 3 |
+
size 22430426
|
CoreNLP/models/ru/tagger/russian-ud-pos.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57eb9e303d97d9c115b879deaa0ffa6e8b2519c8713bb040e5bd5827c8ee64de
|
| 3 |
+
size 9018233
|
CoreNLP/stanford-postagger-full-2020-11-17/LICENSE.txt
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GNU GENERAL PUBLIC LICENSE
|
| 2 |
+
Version 2, June 1991
|
| 3 |
+
|
| 4 |
+
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
| 5 |
+
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 6 |
+
Everyone is permitted to copy and distribute verbatim copies
|
| 7 |
+
of this license document, but changing it is not allowed.
|
| 8 |
+
|
| 9 |
+
Preamble
|
| 10 |
+
|
| 11 |
+
The licenses for most software are designed to take away your
|
| 12 |
+
freedom to share and change it. By contrast, the GNU General Public
|
| 13 |
+
License is intended to guarantee your freedom to share and change free
|
| 14 |
+
software--to make sure the software is free for all its users. This
|
| 15 |
+
General Public License applies to most of the Free Software
|
| 16 |
+
Foundation's software and to any other program whose authors commit to
|
| 17 |
+
using it. (Some other Free Software Foundation software is covered by
|
| 18 |
+
the GNU Lesser General Public License instead.) You can apply it to
|
| 19 |
+
your programs, too.
|
| 20 |
+
|
| 21 |
+
When we speak of free software, we are referring to freedom, not
|
| 22 |
+
price. Our General Public Licenses are designed to make sure that you
|
| 23 |
+
have the freedom to distribute copies of free software (and charge for
|
| 24 |
+
this service if you wish), that you receive source code or can get it
|
| 25 |
+
if you want it, that you can change the software or use pieces of it
|
| 26 |
+
in new free programs; and that you know you can do these things.
|
| 27 |
+
|
| 28 |
+
To protect your rights, we need to make restrictions that forbid
|
| 29 |
+
anyone to deny you these rights or to ask you to surrender the rights.
|
| 30 |
+
These restrictions translate to certain responsibilities for you if you
|
| 31 |
+
distribute copies of the software, or if you modify it.
|
| 32 |
+
|
| 33 |
+
For example, if you distribute copies of such a program, whether
|
| 34 |
+
gratis or for a fee, you must give the recipients all the rights that
|
| 35 |
+
you have. You must make sure that they, too, receive or can get the
|
| 36 |
+
source code. And you must show them these terms so they know their
|
| 37 |
+
rights.
|
| 38 |
+
|
| 39 |
+
We protect your rights with two steps: (1) copyright the software, and
|
| 40 |
+
(2) offer you this license which gives you legal permission to copy,
|
| 41 |
+
distribute and/or modify the software.
|
| 42 |
+
|
| 43 |
+
Also, for each author's protection and ours, we want to make certain
|
| 44 |
+
that everyone understands that there is no warranty for this free
|
| 45 |
+
software. If the software is modified by someone else and passed on, we
|
| 46 |
+
want its recipients to know that what they have is not the original, so
|
| 47 |
+
that any problems introduced by others will not reflect on the original
|
| 48 |
+
authors' reputations.
|
| 49 |
+
|
| 50 |
+
Finally, any free program is threatened constantly by software
|
| 51 |
+
patents. We wish to avoid the danger that redistributors of a free
|
| 52 |
+
program will individually obtain patent licenses, in effect making the
|
| 53 |
+
program proprietary. To prevent this, we have made it clear that any
|
| 54 |
+
patent must be licensed for everyone's free use or not licensed at all.
|
| 55 |
+
|
| 56 |
+
The precise terms and conditions for copying, distribution and
|
| 57 |
+
modification follow.
|
| 58 |
+
|
| 59 |
+
GNU GENERAL PUBLIC LICENSE
|
| 60 |
+
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
| 61 |
+
|
| 62 |
+
0. This License applies to any program or other work which contains
|
| 63 |
+
a notice placed by the copyright holder saying it may be distributed
|
| 64 |
+
under the terms of this General Public License. The "Program", below,
|
| 65 |
+
refers to any such program or work, and a "work based on the Program"
|
| 66 |
+
means either the Program or any derivative work under copyright law:
|
| 67 |
+
that is to say, a work containing the Program or a portion of it,
|
| 68 |
+
either verbatim or with modifications and/or translated into another
|
| 69 |
+
language. (Hereinafter, translation is included without limitation in
|
| 70 |
+
the term "modification".) Each licensee is addressed as "you".
|
| 71 |
+
|
| 72 |
+
Activities other than copying, distribution and modification are not
|
| 73 |
+
covered by this License; they are outside its scope. The act of
|
| 74 |
+
running the Program is not restricted, and the output from the Program
|
| 75 |
+
is covered only if its contents constitute a work based on the
|
| 76 |
+
Program (independent of having been made by running the Program).
|
| 77 |
+
Whether that is true depends on what the Program does.
|
| 78 |
+
|
| 79 |
+
1. You may copy and distribute verbatim copies of the Program's
|
| 80 |
+
source code as you receive it, in any medium, provided that you
|
| 81 |
+
conspicuously and appropriately publish on each copy an appropriate
|
| 82 |
+
copyright notice and disclaimer of warranty; keep intact all the
|
| 83 |
+
notices that refer to this License and to the absence of any warranty;
|
| 84 |
+
and give any other recipients of the Program a copy of this License
|
| 85 |
+
along with the Program.
|
| 86 |
+
|
| 87 |
+
You may charge a fee for the physical act of transferring a copy, and
|
| 88 |
+
you may at your option offer warranty protection in exchange for a fee.
|
| 89 |
+
|
| 90 |
+
2. You may modify your copy or copies of the Program or any portion
|
| 91 |
+
of it, thus forming a work based on the Program, and copy and
|
| 92 |
+
distribute such modifications or work under the terms of Section 1
|
| 93 |
+
above, provided that you also meet all of these conditions:
|
| 94 |
+
|
| 95 |
+
a) You must cause the modified files to carry prominent notices
|
| 96 |
+
stating that you changed the files and the date of any change.
|
| 97 |
+
|
| 98 |
+
b) You must cause any work that you distribute or publish, that in
|
| 99 |
+
whole or in part contains or is derived from the Program or any
|
| 100 |
+
part thereof, to be licensed as a whole at no charge to all third
|
| 101 |
+
parties under the terms of this License.
|
| 102 |
+
|
| 103 |
+
c) If the modified program normally reads commands interactively
|
| 104 |
+
when run, you must cause it, when started running for such
|
| 105 |
+
interactive use in the most ordinary way, to print or display an
|
| 106 |
+
announcement including an appropriate copyright notice and a
|
| 107 |
+
notice that there is no warranty (or else, saying that you provide
|
| 108 |
+
a warranty) and that users may redistribute the program under
|
| 109 |
+
these conditions, and telling the user how to view a copy of this
|
| 110 |
+
License. (Exception: if the Program itself is interactive but
|
| 111 |
+
does not normally print such an announcement, your work based on
|
| 112 |
+
the Program is not required to print an announcement.)
|
| 113 |
+
|
| 114 |
+
These requirements apply to the modified work as a whole. If
|
| 115 |
+
identifiable sections of that work are not derived from the Program,
|
| 116 |
+
and can be reasonably considered independent and separate works in
|
| 117 |
+
themselves, then this License, and its terms, do not apply to those
|
| 118 |
+
sections when you distribute them as separate works. But when you
|
| 119 |
+
distribute the same sections as part of a whole which is a work based
|
| 120 |
+
on the Program, the distribution of the whole must be on the terms of
|
| 121 |
+
this License, whose permissions for other licensees extend to the
|
| 122 |
+
entire whole, and thus to each and every part regardless of who wrote it.
|
| 123 |
+
|
| 124 |
+
Thus, it is not the intent of this section to claim rights or contest
|
| 125 |
+
your rights to work written entirely by you; rather, the intent is to
|
| 126 |
+
exercise the right to control the distribution of derivative or
|
| 127 |
+
collective works based on the Program.
|
| 128 |
+
|
| 129 |
+
In addition, mere aggregation of another work not based on the Program
|
| 130 |
+
with the Program (or with a work based on the Program) on a volume of
|
| 131 |
+
a storage or distribution medium does not bring the other work under
|
| 132 |
+
the scope of this License.
|
| 133 |
+
|
| 134 |
+
3. You may copy and distribute the Program (or a work based on it,
|
| 135 |
+
under Section 2) in object code or executable form under the terms of
|
| 136 |
+
Sections 1 and 2 above provided that you also do one of the following:
|
| 137 |
+
|
| 138 |
+
a) Accompany it with the complete corresponding machine-readable
|
| 139 |
+
source code, which must be distributed under the terms of Sections
|
| 140 |
+
1 and 2 above on a medium customarily used for software interchange; or,
|
| 141 |
+
|
| 142 |
+
b) Accompany it with a written offer, valid for at least three
|
| 143 |
+
years, to give any third party, for a charge no more than your
|
| 144 |
+
cost of physically performing source distribution, a complete
|
| 145 |
+
machine-readable copy of the corresponding source code, to be
|
| 146 |
+
distributed under the terms of Sections 1 and 2 above on a medium
|
| 147 |
+
customarily used for software interchange; or,
|
| 148 |
+
|
| 149 |
+
c) Accompany it with the information you received as to the offer
|
| 150 |
+
to distribute corresponding source code. (This alternative is
|
| 151 |
+
allowed only for noncommercial distribution and only if you
|
| 152 |
+
received the program in object code or executable form with such
|
| 153 |
+
an offer, in accord with Subsection b above.)
|
| 154 |
+
|
| 155 |
+
The source code for a work means the preferred form of the work for
|
| 156 |
+
making modifications to it. For an executable work, complete source
|
| 157 |
+
code means all the source code for all modules it contains, plus any
|
| 158 |
+
associated interface definition files, plus the scripts used to
|
| 159 |
+
control compilation and installation of the executable. However, as a
|
| 160 |
+
special exception, the source code distributed need not include
|
| 161 |
+
anything that is normally distributed (in either source or binary
|
| 162 |
+
form) with the major components (compiler, kernel, and so on) of the
|
| 163 |
+
operating system on which the executable runs, unless that component
|
| 164 |
+
itself accompanies the executable.
|
| 165 |
+
|
| 166 |
+
If distribution of executable or object code is made by offering
|
| 167 |
+
access to copy from a designated place, then offering equivalent
|
| 168 |
+
access to copy the source code from the same place counts as
|
| 169 |
+
distribution of the source code, even though third parties are not
|
| 170 |
+
compelled to copy the source along with the object code.
|
| 171 |
+
|
| 172 |
+
4. You may not copy, modify, sublicense, or distribute the Program
|
| 173 |
+
except as expressly provided under this License. Any attempt
|
| 174 |
+
otherwise to copy, modify, sublicense or distribute the Program is
|
| 175 |
+
void, and will automatically terminate your rights under this License.
|
| 176 |
+
However, parties who have received copies, or rights, from you under
|
| 177 |
+
this License will not have their licenses terminated so long as such
|
| 178 |
+
parties remain in full compliance.
|
| 179 |
+
|
| 180 |
+
5. You are not required to accept this License, since you have not
|
| 181 |
+
signed it. However, nothing else grants you permission to modify or
|
| 182 |
+
distribute the Program or its derivative works. These actions are
|
| 183 |
+
prohibited by law if you do not accept this License. Therefore, by
|
| 184 |
+
modifying or distributing the Program (or any work based on the
|
| 185 |
+
Program), you indicate your acceptance of this License to do so, and
|
| 186 |
+
all its terms and conditions for copying, distributing or modifying
|
| 187 |
+
the Program or works based on it.
|
| 188 |
+
|
| 189 |
+
6. Each time you redistribute the Program (or any work based on the
|
| 190 |
+
Program), the recipient automatically receives a license from the
|
| 191 |
+
original licensor to copy, distribute or modify the Program subject to
|
| 192 |
+
these terms and conditions. You may not impose any further
|
| 193 |
+
restrictions on the recipients' exercise of the rights granted herein.
|
| 194 |
+
You are not responsible for enforcing compliance by third parties to
|
| 195 |
+
this License.
|
| 196 |
+
|
| 197 |
+
7. If, as a consequence of a court judgment or allegation of patent
|
| 198 |
+
infringement or for any other reason (not limited to patent issues),
|
| 199 |
+
conditions are imposed on you (whether by court order, agreement or
|
| 200 |
+
otherwise) that contradict the conditions of this License, they do not
|
| 201 |
+
excuse you from the conditions of this License. If you cannot
|
| 202 |
+
distribute so as to satisfy simultaneously your obligations under this
|
| 203 |
+
License and any other pertinent obligations, then as a consequence you
|
| 204 |
+
may not distribute the Program at all. For example, if a patent
|
| 205 |
+
license would not permit royalty-free redistribution of the Program by
|
| 206 |
+
all those who receive copies directly or indirectly through you, then
|
| 207 |
+
the only way you could satisfy both it and this License would be to
|
| 208 |
+
refrain entirely from distribution of the Program.
|
| 209 |
+
|
| 210 |
+
If any portion of this section is held invalid or unenforceable under
|
| 211 |
+
any particular circumstance, the balance of the section is intended to
|
| 212 |
+
apply and the section as a whole is intended to apply in other
|
| 213 |
+
circumstances.
|
| 214 |
+
|
| 215 |
+
It is not the purpose of this section to induce you to infringe any
|
| 216 |
+
patents or other property right claims or to contest validity of any
|
| 217 |
+
such claims; this section has the sole purpose of protecting the
|
| 218 |
+
integrity of the free software distribution system, which is
|
| 219 |
+
implemented by public license practices. Many people have made
|
| 220 |
+
generous contributions to the wide range of software distributed
|
| 221 |
+
through that system in reliance on consistent application of that
|
| 222 |
+
system; it is up to the author/donor to decide if he or she is willing
|
| 223 |
+
to distribute software through any other system and a licensee cannot
|
| 224 |
+
impose that choice.
|
| 225 |
+
|
| 226 |
+
This section is intended to make thoroughly clear what is believed to
|
| 227 |
+
be a consequence of the rest of this License.
|
| 228 |
+
|
| 229 |
+
8. If the distribution and/or use of the Program is restricted in
|
| 230 |
+
certain countries either by patents or by copyrighted interfaces, the
|
| 231 |
+
original copyright holder who places the Program under this License
|
| 232 |
+
may add an explicit geographical distribution limitation excluding
|
| 233 |
+
those countries, so that distribution is permitted only in or among
|
| 234 |
+
countries not thus excluded. In such case, this License incorporates
|
| 235 |
+
the limitation as if written in the body of this License.
|
| 236 |
+
|
| 237 |
+
9. The Free Software Foundation may publish revised and/or new versions
|
| 238 |
+
of the General Public License from time to time. Such new versions will
|
| 239 |
+
be similar in spirit to the present version, but may differ in detail to
|
| 240 |
+
address new problems or concerns.
|
| 241 |
+
|
| 242 |
+
Each version is given a distinguishing version number. If the Program
|
| 243 |
+
specifies a version number of this License which applies to it and "any
|
| 244 |
+
later version", you have the option of following the terms and conditions
|
| 245 |
+
either of that version or of any later version published by the Free
|
| 246 |
+
Software Foundation. If the Program does not specify a version number of
|
| 247 |
+
this License, you may choose any version ever published by the Free Software
|
| 248 |
+
Foundation.
|
| 249 |
+
|
| 250 |
+
10. If you wish to incorporate parts of the Program into other free
|
| 251 |
+
programs whose distribution conditions are different, write to the author
|
| 252 |
+
to ask for permission. For software which is copyrighted by the Free
|
| 253 |
+
Software Foundation, write to the Free Software Foundation; we sometimes
|
| 254 |
+
make exceptions for this. Our decision will be guided by the two goals
|
| 255 |
+
of preserving the free status of all derivatives of our free software and
|
| 256 |
+
of promoting the sharing and reuse of software generally.
|
| 257 |
+
|
| 258 |
+
NO WARRANTY
|
| 259 |
+
|
| 260 |
+
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
| 261 |
+
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
| 262 |
+
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
| 263 |
+
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
| 264 |
+
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
| 265 |
+
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
| 266 |
+
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
| 267 |
+
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
| 268 |
+
REPAIR OR CORRECTION.
|
| 269 |
+
|
| 270 |
+
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
| 271 |
+
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
| 272 |
+
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
| 273 |
+
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
| 274 |
+
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
| 275 |
+
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
| 276 |
+
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
| 277 |
+
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
| 278 |
+
POSSIBILITY OF SUCH DAMAGES.
|
| 279 |
+
|
| 280 |
+
END OF TERMS AND CONDITIONS
|
| 281 |
+
|
| 282 |
+
How to Apply These Terms to Your New Programs
|
| 283 |
+
|
| 284 |
+
If you develop a new program, and you want it to be of the greatest
|
| 285 |
+
possible use to the public, the best way to achieve this is to make it
|
| 286 |
+
free software which everyone can redistribute and change under these terms.
|
| 287 |
+
|
| 288 |
+
To do so, attach the following notices to the program. It is safest
|
| 289 |
+
to attach them to the start of each source file to most effectively
|
| 290 |
+
convey the exclusion of warranty; and each file should have at least
|
| 291 |
+
the "copyright" line and a pointer to where the full notice is found.
|
| 292 |
+
|
| 293 |
+
<one line to give the program's name and a brief idea of what it does.>
|
| 294 |
+
Copyright (C) <year> <name of author>
|
| 295 |
+
|
| 296 |
+
This program is free software; you can redistribute it and/or modify
|
| 297 |
+
it under the terms of the GNU General Public License as published by
|
| 298 |
+
the Free Software Foundation; either version 2 of the License, or
|
| 299 |
+
(at your option) any later version.
|
| 300 |
+
|
| 301 |
+
This program is distributed in the hope that it will be useful,
|
| 302 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 303 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 304 |
+
GNU General Public License for more details.
|
| 305 |
+
|
| 306 |
+
You should have received a copy of the GNU General Public License along
|
| 307 |
+
with this program; if not, write to the Free Software Foundation, Inc.,
|
| 308 |
+
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
| 309 |
+
|
| 310 |
+
Also add information on how to contact you by electronic and paper mail.
|
| 311 |
+
|
| 312 |
+
If the program is interactive, make it output a short notice like this
|
| 313 |
+
when it starts in an interactive mode:
|
| 314 |
+
|
| 315 |
+
Gnomovision version 69, Copyright (C) year name of author
|
| 316 |
+
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
| 317 |
+
This is free software, and you are welcome to redistribute it
|
| 318 |
+
under certain conditions; type `show c' for details.
|
| 319 |
+
|
| 320 |
+
The hypothetical commands `show w' and `show c' should show the appropriate
|
| 321 |
+
parts of the General Public License. Of course, the commands you use may
|
| 322 |
+
be called something other than `show w' and `show c'; they could even be
|
| 323 |
+
mouse-clicks or menu items--whatever suits your program.
|
| 324 |
+
|
| 325 |
+
You should also get your employer (if you work as a programmer) or your
|
| 326 |
+
school, if any, to sign a "copyright disclaimer" for the program, if
|
| 327 |
+
necessary. Here is a sample; alter the names:
|
| 328 |
+
|
| 329 |
+
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
| 330 |
+
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
| 331 |
+
|
| 332 |
+
<signature of Ty Coon>, 1 April 1989
|
| 333 |
+
Ty Coon, President of Vice
|
| 334 |
+
|
| 335 |
+
This General Public License does not permit incorporating your program into
|
| 336 |
+
proprietary programs. If your program is a subroutine library, you may
|
| 337 |
+
consider it more useful to permit linking proprietary applications with the
|
| 338 |
+
library. If this is what you want to do, use the GNU Lesser General
|
| 339 |
+
Public License instead of this License.
|
CoreNLP/stanford-postagger-full-2020-11-17/README.txt
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Stanford POS Tagger, v4.2.0 - 2020-11-17
|
| 2 |
+
Copyright (c) 2002-2020 The Board of Trustees of
|
| 3 |
+
The Leland Stanford Junior University. All Rights Reserved.
|
| 4 |
+
|
| 5 |
+
Original tagger author: Kristina Toutanova
|
| 6 |
+
Code contributions: Christopher Manning, Dan Klein, William Morgan,
|
| 7 |
+
Huihsin Tseng, Anna Rafferty, John Bauer
|
| 8 |
+
Major rewrite for version 2.0 by Michel Galley.
|
| 9 |
+
Current release prepared by: Jason Bolton
|
| 10 |
+
|
| 11 |
+
This package contains a Maximum Entropy part of speech tagger.
|
| 12 |
+
|
| 13 |
+
A Part-Of-Speech Tagger (POS Tagger) is a piece of software that reads
|
| 14 |
+
text in some language and assigns parts of speech to each word (and
|
| 15 |
+
other tokens), such as noun, verb, adjective, etc. Generally
|
| 16 |
+
computational applications use more fine-grained POS tags like
|
| 17 |
+
'noun-plural'. This software is a Java implementation of the log-linear
|
| 18 |
+
part-of-speech (POS) taggers described in:
|
| 19 |
+
|
| 20 |
+
Kristina Toutanova and Christopher D. Manning. 2000. Enriching the
|
| 21 |
+
Knowledge Sources Used in a Maximum Entropy Part-of-Speech
|
| 22 |
+
Tagger. Proceedings of the Joint SIGDAT Conference on Empirical Methods
|
| 23 |
+
in Natural Language Processing and Very Large Corpora (EMNLP/VLC-2000),
|
| 24 |
+
Hong Kong.
|
| 25 |
+
|
| 26 |
+
Kristina Toutanova, Dan Klein, Christopher Manning, and Yoram
|
| 27 |
+
Singer. 2003. Feature-Rich Part-of-Speech Tagging with a Cyclic
|
| 28 |
+
Dependency Network. In Proceedings of HLT-NAACL 2003 pages 252-259.
|
| 29 |
+
|
| 30 |
+
The system requires Java 1.8+ to be installed. About 60 MB of memory is
|
| 31 |
+
required to run a trained tagger, depending on the OS, tagging model
|
| 32 |
+
chosen, etc. (i.e., you may need to give to java an option like java
|
| 33 |
+
-mx120m). Plenty of memory is needed to train a tagger. It depends on
|
| 34 |
+
the complexity of the model but at least 1GB is recommended (java
|
| 35 |
+
-mx1g). Two trained tagger models for English are included with the
|
| 36 |
+
tagger, along with some caseless versions, and we provide models for
|
| 37 |
+
some other languages. The tagger can be retrained on other languages
|
| 38 |
+
based on POS-annotated training text.
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
QUICKSTART
|
| 43 |
+
-----------------------------------------------
|
| 44 |
+
|
| 45 |
+
The Stanford POS Tagger is designed to be used from the command line or
|
| 46 |
+
programmatically via its API.
|
| 47 |
+
|
| 48 |
+
There is a GUI interface, but it is for
|
| 49 |
+
demonstration purposes only; most features of the tagger can only be
|
| 50 |
+
accessed via the command line. To run the demonstration GUI you should
|
| 51 |
+
be able to use any of the following 2 methods:
|
| 52 |
+
|
| 53 |
+
1)
|
| 54 |
+
java -mx200m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI models/wsj-0-18-left3words-distsim.tagger
|
| 55 |
+
|
| 56 |
+
2) Running the appropriate script for your operating system:
|
| 57 |
+
stanford-postagger-gui.bat
|
| 58 |
+
./stanford-postagger-gui.sh
|
| 59 |
+
|
| 60 |
+
To run the tagger from the command line, you can start with the provided
|
| 61 |
+
script appropriate for you operating system:
|
| 62 |
+
./stanford-postagger.sh models/wsj-0-18-left3words-distsim.tagger sample-input.txt
|
| 63 |
+
stanford-postagger models\wsj-0-18-left3words-distsim.tagger sample-input.txt
|
| 64 |
+
The output should match what is found in sample-output.txt
|
| 65 |
+
|
| 66 |
+
The tagger has three modes: tagging, training, and testing. Tagging
|
| 67 |
+
allows you to use a pretrained model (two English models are included)
|
| 68 |
+
to assign part of speech tags to unlabeled text. Training allows you to
|
| 69 |
+
save a new model based on a set of tagged data that you provide.
|
| 70 |
+
Testing allows you to see how well a tagger performs by tagging labeled
|
| 71 |
+
data and evaluating the results against the correct tags.
|
| 72 |
+
|
| 73 |
+
Many options are available for training, tagging, and testing. These
|
| 74 |
+
options can be set using a properties file. To start, you can generate a
|
| 75 |
+
default properties file by:
|
| 76 |
+
|
| 77 |
+
java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -genprops > myPropsFile.prop
|
| 78 |
+
|
| 79 |
+
This will create the file myPropsFile.prop with descriptions of each
|
| 80 |
+
option for the tagger and the default values for these options
|
| 81 |
+
specified. Any properties you can specify in a properties file can be
|
| 82 |
+
specified on the command line or vice versa. For further information,
|
| 83 |
+
please consult the Javadocs (start with the entry for MaxentTagger,
|
| 84 |
+
which includes a table of all options which may be set to configure the
|
| 85 |
+
tagger and descriptions of those options).
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
To tag a file using the pre-trained bidirectional model
|
| 89 |
+
=======================================================
|
| 90 |
+
|
| 91 |
+
java -mx300m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -model models/wsj-0-18-bidirectional-distsim.tagger -textFile sample-input.txt > sample-tagged.txt
|
| 92 |
+
|
| 93 |
+
Tagged output will be printed to standard out, which you can redirect
|
| 94 |
+
as above. Note that the bidirectional model is slightly more accurate
|
| 95 |
+
but significantly slower than the left3words model.
|
| 96 |
+
|
| 97 |
+
To train a simple model
|
| 98 |
+
=======================
|
| 99 |
+
|
| 100 |
+
java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -prop propertiesFile -model modelFile -trainFile trainingFile
|
| 101 |
+
|
| 102 |
+
To test a model
|
| 103 |
+
===============
|
| 104 |
+
|
| 105 |
+
java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -prop propertiesFile -model modelFile -testFile testFile
|
| 106 |
+
|
| 107 |
+
Using models for French, German, and Spanish
|
| 108 |
+
===========================================
|
| 109 |
+
|
| 110 |
+
Starting with version 4.0.0, French, German, and Spanish are tokenized according to the UD 2.0 standard. This includes creating
|
| 111 |
+
multiword tokens. This functionality requires the pipeline functionality only available in the full Stanford CoreNLP distribution.
|
| 112 |
+
To tag French, German, or Spanish, one must provide UD 2.0 tokenized text, or upgrade to the full Stanford CoreNLP package to get
|
| 113 |
+
UD 2.0 tokenization for these languages.
|
| 114 |
+
|
| 115 |
+
To run on pretokenized text, add "-tokenize false" to your command.
|
| 116 |
+
|
| 117 |
+
Example:
|
| 118 |
+
|
| 119 |
+
java -mx300m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -model models/french-ud.tagger -tokenize false -textFile sample-input.txt > sample-tagged.txt
|
| 120 |
+
|
| 121 |
+
CONTENTS
|
| 122 |
+
-----------------------------------------------
|
| 123 |
+
README.txt
|
| 124 |
+
|
| 125 |
+
This file.
|
| 126 |
+
|
| 127 |
+
LICENSE.txt
|
| 128 |
+
|
| 129 |
+
Stanford POS Tagger is licensed under the GNU General Public License (v2+).
|
| 130 |
+
|
| 131 |
+
stanford-postagger.jar
|
| 132 |
+
stanford-postagger-YYYY-MM-DD.jar
|
| 133 |
+
|
| 134 |
+
This is a JAR file containing all the Stanford classes necessary to
|
| 135 |
+
run the Stanford POS Tagger. The two jar files are identical. You can use
|
| 136 |
+
either the one with a version (date) indication or without, as you prefer.
|
| 137 |
+
|
| 138 |
+
src
|
| 139 |
+
|
| 140 |
+
A directory containing the Java 1.8 source code for the Stanford POS
|
| 141 |
+
Tagger distribution.
|
| 142 |
+
|
| 143 |
+
build.xml, Makefile
|
| 144 |
+
|
| 145 |
+
Files for building the distribution (with ant and make, respectively)
|
| 146 |
+
|
| 147 |
+
models
|
| 148 |
+
|
| 149 |
+
A directory containing trained POS taggers; the taggers end in ".tagger"
|
| 150 |
+
and the props file used to make the taggers end in ".props". The
|
| 151 |
+
".props" files cannot be directly used on your own machine as they use
|
| 152 |
+
paths on the Stanford NLP machines, but they may serve as examples for
|
| 153 |
+
your own properties files. Included in the full version are other
|
| 154 |
+
English taggers, a German tagger, an Arabic tagger, and a Chinese
|
| 155 |
+
tagger. If you chose to download the smaller version of the tagger,
|
| 156 |
+
you have only two English taggers (left3words is faster but slightly
|
| 157 |
+
less accurate than bidirectional-distsim) - feel free to download any
|
| 158 |
+
other taggers you need from the POS tagger website. More information
|
| 159 |
+
about the models can be found in the README-Models.txt file in this
|
| 160 |
+
directory.
|
| 161 |
+
|
| 162 |
+
sample-input.txt
|
| 163 |
+
|
| 164 |
+
A sample text file that you can tag to demonstrate the tagger.
|
| 165 |
+
|
| 166 |
+
sample-output.txt
|
| 167 |
+
|
| 168 |
+
Tagged output of the tagger (using the left3words model)
|
| 169 |
+
|
| 170 |
+
stanford-postagger-gui.sh
|
| 171 |
+
stanford-postagger-gui.bat
|
| 172 |
+
|
| 173 |
+
Scripts for invoking the GUI demonstration version of the tagger.
|
| 174 |
+
|
| 175 |
+
stanford-postagger.sh
|
| 176 |
+
stanford-postagger.bat
|
| 177 |
+
|
| 178 |
+
Scripts for running the command-line version of the tagger.
|
| 179 |
+
|
| 180 |
+
javadoc
|
| 181 |
+
|
| 182 |
+
Javadocs for the distribution. In particular, look at the javadocs
|
| 183 |
+
for the class edu.stanford.nlp.tagger.maxent.MaxentTagger.
|
| 184 |
+
|
| 185 |
+
TaggerDemo.java
|
| 186 |
+
|
| 187 |
+
A sample file for how to call the tagger in your own program. You
|
| 188 |
+
should be able to compile and run it with:
|
| 189 |
+
|
| 190 |
+
javac -cp stanford-postagger.jar TaggerDemo.java
|
| 191 |
+
java -cp ".:stanford-postagger.jar" TaggerDemo models/wsj-0-18-left3words-distsim.tagger sample-input.txt
|
| 192 |
+
|
| 193 |
+
(If you are on Windows, you need to replace the ":" with a ";" in the
|
| 194 |
+
-cp argument, and should use a "\" in place of the "/" in the filename....)
|
| 195 |
+
|
| 196 |
+
THANKS
|
| 197 |
+
-----------------------------------------------
|
| 198 |
+
|
| 199 |
+
Thanks to the members of the Stanford Natural Language Processing Lab
|
| 200 |
+
for great collaborative work on Java libraries for natural language
|
| 201 |
+
processing.
|
| 202 |
+
|
| 203 |
+
http://nlp.stanford.edu/javanlp/
|
| 204 |
+
|
| 205 |
+
CHANGES
|
| 206 |
+
-----------------------------------------------
|
| 207 |
+
|
| 208 |
+
2020-11-17 4.2.0 Add currency data for English models.
|
| 209 |
+
|
| 210 |
+
2020-08-06 4.1.0 Add missing extractor, spanish tokenization
|
| 211 |
+
upgrades
|
| 212 |
+
|
| 213 |
+
2020-05-22 4.0.0 Model tokenization updated to UDv2.0
|
| 214 |
+
|
| 215 |
+
2018-10-16 3.9.2 New English models, better currency symbol
|
| 216 |
+
handling
|
| 217 |
+
|
| 218 |
+
2018-02-27 3.9.1 new French UD model
|
| 219 |
+
|
| 220 |
+
2017-06-09 3.8.0 new Spanish and French UD models
|
| 221 |
+
|
| 222 |
+
2016-10-31 3.7.0 Update for compatibility, German UD model
|
| 223 |
+
|
| 224 |
+
2015-12-09 3.6.0 Updated for compatibility
|
| 225 |
+
|
| 226 |
+
2015-04-20 3.5.2 Update for compatibility
|
| 227 |
+
|
| 228 |
+
2015-01-29 3.5.1 General bugfixes
|
| 229 |
+
|
| 230 |
+
2014-10-26 3.5.0 Upgrade to Java 1.8
|
| 231 |
+
|
| 232 |
+
2014-08-27 3.4.1 Add Spanish models
|
| 233 |
+
|
| 234 |
+
2014-06-16 3.4 Using CC tagset for French
|
| 235 |
+
|
| 236 |
+
2014-01-04 3.3.1 Bugfix release
|
| 237 |
+
|
| 238 |
+
2013-11-12 3.3.0 Add imperatives to English training data
|
| 239 |
+
|
| 240 |
+
2013-06-19 3.2.0 Decrease size and improve speed of tagger
|
| 241 |
+
models for all languages
|
| 242 |
+
|
| 243 |
+
2013-04-04 3.1.5 Speed improvements, ctb7 model, -nthreads
|
| 244 |
+
option
|
| 245 |
+
|
| 246 |
+
2012-11-11 3.1.4 Updated Chinese model
|
| 247 |
+
|
| 248 |
+
2012-07-09 3.1.3 Minor bug fixes
|
| 249 |
+
|
| 250 |
+
2012-05-22 3.1.2 Updated for compatibility with other releases
|
| 251 |
+
|
| 252 |
+
2012-03-09 3.1.1 Caseless models added
|
| 253 |
+
|
| 254 |
+
2012-01-06 3.1.0 French tagger added, tagging speed improved
|
| 255 |
+
|
| 256 |
+
2011-09-14 3.0.4 Updated for compatibility with other releases
|
| 257 |
+
|
| 258 |
+
2011-06-15 3.0.3 Updated for compatibility with other releases
|
| 259 |
+
|
| 260 |
+
2011-05-15 3.0.2 Can read training files in TSV format
|
| 261 |
+
|
| 262 |
+
2011-04-17 3.0.1 Improved German and Arabic models
|
| 263 |
+
Compatible with other Stanford releases
|
| 264 |
+
|
| 265 |
+
2010-05-21 3.0.0 Re-entrant
|
| 266 |
+
|
| 267 |
+
LICENSE
|
| 268 |
+
-----------------------------------------------
|
| 269 |
+
|
| 270 |
+
Stanford POS Tagger
|
| 271 |
+
Copyright (c) 2002-2010 The Board of Trustees of
|
| 272 |
+
The Leland Stanford Junior University. All Rights Reserved.
|
| 273 |
+
|
| 274 |
+
This program is free software; you can redistribute it and/or
|
| 275 |
+
modify it under the terms of the GNU General Public License
|
| 276 |
+
as published by the Free Software Foundation; either version 2
|
| 277 |
+
of the License, or (at your option) any later version.
|
| 278 |
+
|
| 279 |
+
This program is distributed in the hope that it will be useful,
|
| 280 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 281 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 282 |
+
GNU General Public License for more details.
|
| 283 |
+
|
| 284 |
+
You should have received a copy of the GNU General Public License
|
| 285 |
+
along with this program. If not, see http://www.gnu.org/licenses/ .
|
| 286 |
+
|
| 287 |
+
For more information, bug reports, fixes, contact:
|
| 288 |
+
Christopher Manning
|
| 289 |
+
Dept of Computer Science, Gates 2A
|
| 290 |
+
Stanford CA 94305-9020
|
| 291 |
+
USA
|
| 292 |
+
Support/Questions: [email protected]
|
| 293 |
+
Licensing: [email protected]
|
| 294 |
+
http://nlp.stanford.edu/software/tagger.html
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
CONTACT
|
| 298 |
+
-----------------------------------------------
|
| 299 |
+
|
| 300 |
+
For questions about the Stanford POS tagger, please feel free to contact
|
| 301 |
+
the Stanford JavaNLP user community at the mailing list
|
| 302 |
+
[email protected]. You need to be a member of this
|
| 303 |
+
mailing list to be able to post to it. Join the list either by emailing
|
| 304 |
+
[email protected] (leave the subject and message
|
| 305 |
+
body empty) or by using the web interface at:
|
| 306 |
+
|
| 307 |
+
https://mailman.stanford.edu/mailman/listinfo/java-nlp-user
|
| 308 |
+
|
| 309 |
+
This is the best list to post to in order to ask questions, make
|
| 310 |
+
announcements, or for discussion among Stanford JavaNLP tool users. We
|
| 311 |
+
provide assistance on a best-effort basis. You can also look at the list
|
| 312 |
+
archives via https://mailman.stanford.edu/pipermail/java-nlp-user/. For
|
| 313 |
+
licensing questions, please see the tagger webpage or contact Stanford
|
| 314 |
+
JavaNLP at [email protected].
|
| 315 |
+
|
CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo.java
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import edu.stanford.nlp.util.logging.Redwood;
|
| 2 |
+
|
| 3 |
+
import java.io.BufferedReader;
|
| 4 |
+
import java.io.FileReader;
|
| 5 |
+
import java.util.List;
|
| 6 |
+
|
| 7 |
+
import edu.stanford.nlp.ling.SentenceUtils;
|
| 8 |
+
import edu.stanford.nlp.ling.TaggedWord;
|
| 9 |
+
import edu.stanford.nlp.ling.HasWord;
|
| 10 |
+
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
|
| 11 |
+
|
| 12 |
+
public class TaggerDemo {
|
| 13 |
+
|
| 14 |
+
/** A logger for this class */
|
| 15 |
+
private static Redwood.RedwoodChannels log = Redwood.channels(TaggerDemo.class);
|
| 16 |
+
|
| 17 |
+
private TaggerDemo() {}
|
| 18 |
+
|
| 19 |
+
public static void main(String[] args) throws Exception {
|
| 20 |
+
if (args.length != 2) {
|
| 21 |
+
log.info("usage: java TaggerDemo modelFile fileToTag");
|
| 22 |
+
return;
|
| 23 |
+
}
|
| 24 |
+
MaxentTagger tagger = new MaxentTagger(args[0]);
|
| 25 |
+
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
|
| 26 |
+
for (List<HasWord> sentence : sentences) {
|
| 27 |
+
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
|
| 28 |
+
System.out.println(SentenceUtils.listToString(tSentence, false));
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
}
|
CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo2.java
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import edu.stanford.nlp.util.logging.Redwood;
|
| 2 |
+
|
| 3 |
+
import java.io.BufferedReader;
|
| 4 |
+
import java.io.FileInputStream;
|
| 5 |
+
import java.io.InputStreamReader;
|
| 6 |
+
import java.io.OutputStreamWriter;
|
| 7 |
+
import java.io.PrintWriter;
|
| 8 |
+
import java.util.List;
|
| 9 |
+
|
| 10 |
+
import edu.stanford.nlp.ling.SentenceUtils;
|
| 11 |
+
import edu.stanford.nlp.ling.TaggedWord;
|
| 12 |
+
import edu.stanford.nlp.ling.HasWord;
|
| 13 |
+
import edu.stanford.nlp.ling.CoreLabel;
|
| 14 |
+
import edu.stanford.nlp.process.CoreLabelTokenFactory;
|
| 15 |
+
import edu.stanford.nlp.process.DocumentPreprocessor;
|
| 16 |
+
import edu.stanford.nlp.process.PTBTokenizer;
|
| 17 |
+
import edu.stanford.nlp.process.TokenizerFactory;
|
| 18 |
+
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
|
| 19 |
+
|
| 20 |
+
/** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
|
| 21 |
+
* being tagged by the tagger. The sentences are generated by direct use
|
| 22 |
+
* of the DocumentPreprocessor class.
|
| 23 |
+
*
|
| 24 |
+
* @author Christopher Manning
|
| 25 |
+
*/
|
| 26 |
+
public class TaggerDemo2 {
|
| 27 |
+
|
| 28 |
+
/** A logger for this class */
|
| 29 |
+
private static Redwood.RedwoodChannels log = Redwood.channels(TaggerDemo2.class);
|
| 30 |
+
|
| 31 |
+
private TaggerDemo2() {}
|
| 32 |
+
|
| 33 |
+
public static void main(String[] args) throws Exception {
|
| 34 |
+
if (args.length != 2) {
|
| 35 |
+
log.info("usage: java TaggerDemo2 modelFile fileToTag");
|
| 36 |
+
return;
|
| 37 |
+
}
|
| 38 |
+
MaxentTagger tagger = new MaxentTagger(args[0]);
|
| 39 |
+
TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
|
| 40 |
+
"untokenizable=noneKeep");
|
| 41 |
+
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
|
| 42 |
+
PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
|
| 43 |
+
DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
|
| 44 |
+
documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
|
| 45 |
+
for (List<HasWord> sentence : documentPreprocessor) {
|
| 46 |
+
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
|
| 47 |
+
pw.println(SentenceUtils.listToString(tSentence, false));
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
// print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
|
| 51 |
+
List<HasWord> sent = SentenceUtils.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
|
| 52 |
+
List<TaggedWord> taggedSent = tagger.tagSentence(sent);
|
| 53 |
+
for (TaggedWord tw : taggedSent) {
|
| 54 |
+
if (tw.tag().startsWith("JJ")) {
|
| 55 |
+
pw.println(tw.word());
|
| 56 |
+
}
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
pw.close();
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
}
|
CoreNLP/stanford-postagger-full-2020-11-17/build.xml
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- build.xml file for ant for JavaNLP -->
|
| 2 |
+
|
| 3 |
+
<!-- Before using this, unjar the sources' jar file into the src/ directory! -->
|
| 4 |
+
|
| 5 |
+
<!-- A "project" describes a set of targets that may be requested
|
| 6 |
+
when Ant is executed. The "default" attribute defines the
|
| 7 |
+
target which is executed if no specific target is requested,
|
| 8 |
+
and the "basedir" attribute defines the current working directory
|
| 9 |
+
from which Ant executes the requested task. This is normally
|
| 10 |
+
set to the current working directory.
|
| 11 |
+
-->
|
| 12 |
+
|
| 13 |
+
<project name="JavaNLP" default="compile" basedir=".">
|
| 14 |
+
|
| 15 |
+
<property name="build.home" value="${basedir}/classes"/>
|
| 16 |
+
<property name="build.tests" value="${basedir}/classes"/>
|
| 17 |
+
<property name="docs.home" value="${basedir}/docs"/>
|
| 18 |
+
<property name="src.home" value="${basedir}/src"/>
|
| 19 |
+
<property name="javadoc.home" value="${basedir}/javadoc"/>
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
<!-- ==================== Compilation Control Options ==================== -->
|
| 23 |
+
|
| 24 |
+
<!--
|
| 25 |
+
|
| 26 |
+
These properties control option settings on the Javac compiler when it
|
| 27 |
+
is invoked using the <javac> task.
|
| 28 |
+
|
| 29 |
+
compile.debug Should compilation include the debug option?
|
| 30 |
+
|
| 31 |
+
compile.deprecation Should compilation include the deprecation option?
|
| 32 |
+
|
| 33 |
+
compile.optimize Should compilation include the optimize option?
|
| 34 |
+
|
| 35 |
+
compile.source Source version compatibility
|
| 36 |
+
|
| 37 |
+
compile.target Target class version compatibility
|
| 38 |
+
|
| 39 |
+
-->
|
| 40 |
+
|
| 41 |
+
<property name="compile.debug" value="true"/>
|
| 42 |
+
<property name="compile.deprecation" value="false"/>
|
| 43 |
+
<property name="compile.optimize" value="true"/>
|
| 44 |
+
<property name="compile.source" value="1.8" />
|
| 45 |
+
<property name="compile.target" value="1.8" />
|
| 46 |
+
<property name="compile.encoding" value="utf-8" />
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
<!-- ==================== All Target ====================================== -->
|
| 52 |
+
|
| 53 |
+
<!--
|
| 54 |
+
|
| 55 |
+
The "all" target is a shortcut for running the "clean" target followed
|
| 56 |
+
by the "compile" target, to force a complete recompile.
|
| 57 |
+
|
| 58 |
+
-->
|
| 59 |
+
|
| 60 |
+
<target name="all" depends="clean,compile"
|
| 61 |
+
description="Clean build and dist directories, then compile"/>
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
<!-- ==================== Clean Target ==================================== -->
|
| 66 |
+
|
| 67 |
+
<!--
|
| 68 |
+
|
| 69 |
+
The "clean" target deletes any previous "build" and "dist" directory,
|
| 70 |
+
so that you can be ensured the application can be built from scratch.
|
| 71 |
+
|
| 72 |
+
-->
|
| 73 |
+
|
| 74 |
+
<target name="clean" description="Delete old classes">
|
| 75 |
+
<delete dir="${build.home}/edu"/>
|
| 76 |
+
</target>
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
<!-- ==================== Compile Target ================================== -->
|
| 81 |
+
|
| 82 |
+
<!--
|
| 83 |
+
|
| 84 |
+
The "compile" target transforms source files (from your "src" directory)
|
| 85 |
+
into object files in the appropriate location in the build directory.
|
| 86 |
+
This example assumes that you will be including your classes in an
|
| 87 |
+
unpacked directory hierarchy under "/WEB-INF/classes".
|
| 88 |
+
|
| 89 |
+
-->
|
| 90 |
+
|
| 91 |
+
<target name="compile" depends="prepare"
|
| 92 |
+
description="Compile Java sources">
|
| 93 |
+
|
| 94 |
+
<!-- Compile Java classes as necessary -->
|
| 95 |
+
<mkdir dir="${build.home}"/>
|
| 96 |
+
<javac srcdir="${src.home}"
|
| 97 |
+
destdir="${build.home}"
|
| 98 |
+
debug="${compile.debug}"
|
| 99 |
+
encoding="${compile.encoding}"
|
| 100 |
+
deprecation="${compile.deprecation}"
|
| 101 |
+
optimize="${compile.optimize}"
|
| 102 |
+
source="${compile.source}"
|
| 103 |
+
target="${compile.target}"
|
| 104 |
+
includeantruntime="false">
|
| 105 |
+
<compilerarg value="-Xmaxerrs"/>
|
| 106 |
+
<compilerarg value="20"/>
|
| 107 |
+
<!-- <compilerarg value="-Xlint"/> -->
|
| 108 |
+
</javac>
|
| 109 |
+
|
| 110 |
+
<!-- Copy application resources -->
|
| 111 |
+
<!--
|
| 112 |
+
<copy todir="${build.home}/WEB-INF/classes">
|
| 113 |
+
<fileset dir="${src.home}" excludes="**/*.java"/>
|
| 114 |
+
</copy>
|
| 115 |
+
-->
|
| 116 |
+
|
| 117 |
+
</target>
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
<!-- ==================== Jar Target ================================== -->
|
| 121 |
+
|
| 122 |
+
<!--
|
| 123 |
+
The "jar" target recreates the jar file, which you may want to do if
|
| 124 |
+
you take apart the source jar and change something.
|
| 125 |
+
-->
|
| 126 |
+
|
| 127 |
+
<property name="jar.output" value="stanford-postagger.jar" />
|
| 128 |
+
<property name="jar.mainclass" value="edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI" />
|
| 129 |
+
|
| 130 |
+
<target name="jar" depends="compile" description="Build a jar file">
|
| 131 |
+
<jar destfile="${jar.output}">
|
| 132 |
+
<fileset dir="${build.home}"/>
|
| 133 |
+
<manifest>
|
| 134 |
+
<attribute name="Main-Class" value="${jar.mainclass}"/>
|
| 135 |
+
</manifest>
|
| 136 |
+
</jar>
|
| 137 |
+
</target>
|
| 138 |
+
|
| 139 |
+
<!-- ==================== Javadoc Target ================================== -->
|
| 140 |
+
|
| 141 |
+
<!--
|
| 142 |
+
|
| 143 |
+
The "javadoc" target creates Javadoc API documentation for the Java
|
| 144 |
+
classes included in your application. Normally, this is only required
|
| 145 |
+
when preparing a distribution release, but is available as a separate
|
| 146 |
+
target in case the developer wants to create Javadocs independently.
|
| 147 |
+
|
| 148 |
+
-->
|
| 149 |
+
|
| 150 |
+
<target name="javadoc" depends="compile"
|
| 151 |
+
description="Create Javadoc API documentation">
|
| 152 |
+
|
| 153 |
+
<mkdir dir="${javadoc.home}"/>
|
| 154 |
+
<javadoc sourcepath="${src.home}"
|
| 155 |
+
destdir="${javadoc.home}"
|
| 156 |
+
maxmemory="1g"
|
| 157 |
+
author="true"
|
| 158 |
+
source="${compile.source}"
|
| 159 |
+
overview="${src.home}/edu/stanford/nlp/overview.html"
|
| 160 |
+
doctitle="Stanford JavaNLP API Documentation"
|
| 161 |
+
windowtitle="Stanford JavaNLP API"
|
| 162 |
+
encoding="${compile.encoding}"
|
| 163 |
+
docencoding="${compile.encoding}"
|
| 164 |
+
charset="${compile.encoding}"
|
| 165 |
+
packagenames="*">
|
| 166 |
+
<!-- Allow @generated, @modifiable and @ordered tags -->
|
| 167 |
+
<tag name="generated" scope="all" description="Generated" />
|
| 168 |
+
<tag name="modifiable" scope="all" description="Modifiable" />
|
| 169 |
+
<tag name="ordered" scope="all" description="Ordered" />
|
| 170 |
+
<!-- Depends on lib and classes folders -->
|
| 171 |
+
<classpath>
|
| 172 |
+
<pathelement path="${build.home}" />
|
| 173 |
+
</classpath>
|
| 174 |
+
<bottom><![CDATA[<font size="2"><a href="https://nlp.stanford.edu" target="_top">Stanford NLP Group</a></font>]]></bottom>
|
| 175 |
+
<link href="https://docs.oracle.com/javase/8/docs/api/"/>
|
| 176 |
+
</javadoc>
|
| 177 |
+
|
| 178 |
+
</target>
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
<!-- ==================== Prepare Target ================================== -->
|
| 182 |
+
|
| 183 |
+
<!--
|
| 184 |
+
|
| 185 |
+
The "prepare" target is used to create the "build" destination directory,
|
| 186 |
+
and copy the static contents of your web application to it. If you need
|
| 187 |
+
to copy static files from external dependencies, you can customize the
|
| 188 |
+
contents of this task.
|
| 189 |
+
|
| 190 |
+
Normally, this task is executed indirectly when needed.
|
| 191 |
+
|
| 192 |
+
-->
|
| 193 |
+
|
| 194 |
+
<target name="prepare">
|
| 195 |
+
|
| 196 |
+
<!-- Create build directories as needed -->
|
| 197 |
+
<mkdir dir="${build.home}"/>
|
| 198 |
+
|
| 199 |
+
</target>
|
| 200 |
+
|
| 201 |
+
</project>
|
CoreNLP/stanford-postagger-full-2020-11-17/data/enclitic-inflections.data
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
CoreNLP/stanford-postagger-full-2020-11-17/models/README-Models.txt
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Stanford POS Tagger, v4.2.0 - 2020-11-17
|
| 2 |
+
Copyright (c) 2002-2020 The Board of Trustees of
|
| 3 |
+
The Leland Stanford Junior University. All Rights Reserved.
|
| 4 |
+
|
| 5 |
+
This document contains (some) information about the models included in
|
| 6 |
+
this release and that may be downloaded for the POS tagger website at
|
| 7 |
+
http://nlp.stanford.edu/software/tagger.html . All of the models mentioned
|
| 8 |
+
in this document are in the downloaded package in the same directory as this
|
| 9 |
+
readme. All taggers are accompanied by the props files used to create
|
| 10 |
+
them; please examine these files for more detailed information about the
|
| 11 |
+
creation of the taggers.
|
| 12 |
+
|
| 13 |
+
For English, the bidirectional taggers are slightly more accurate, but
|
| 14 |
+
tag much more slowly; choose the appropriate tagger based on your
|
| 15 |
+
speed/performance needs.
|
| 16 |
+
|
| 17 |
+
English taggers
|
| 18 |
+
---------------------------
|
| 19 |
+
english-left3words-distsim.tagger
|
| 20 |
+
Trained on WSJ sections 0-18 and extra parser training data using the
|
| 21 |
+
left3words architecture and includes word shape and distributional
|
| 22 |
+
similarity features. Penn tagset. UDv2.0 tokenization standard.
|
| 23 |
+
|
| 24 |
+
english-bidirectional-distsim.tagger
|
| 25 |
+
Trained on WSJ sections 0-18 using a bidirectional architecture and
|
| 26 |
+
including word shape and distributional similarity features.
|
| 27 |
+
Penn Treebank tagset. UDv2.0 tokenization standard.
|
| 28 |
+
|
| 29 |
+
english-caseless-left3words-distsim.tagger
|
| 30 |
+
Trained on WSJ sections 0-18 and extra parser training data using the
|
| 31 |
+
left3words architecture and includes word shape and distributional
|
| 32 |
+
similarity features. Penn tagset. Ignores case. UDv2.0 tokenization
|
| 33 |
+
standard.
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
Chinese tagger
|
| 37 |
+
---------------------------
|
| 38 |
+
chinese-nodistsim.tagger
|
| 39 |
+
Trained on a combination of CTB7 texts from Chinese and Hong Kong
|
| 40 |
+
sources.
|
| 41 |
+
LDC Chinese Treebank POS tag set.
|
| 42 |
+
|
| 43 |
+
chinese-distsim.tagger
|
| 44 |
+
Trained on a combination of CTB7 texts from Chinese and Hong Kong
|
| 45 |
+
sources with distributional similarity clusters.
|
| 46 |
+
LDC Chinese Treebank POS tag set.
|
| 47 |
+
|
| 48 |
+
Arabic tagger
|
| 49 |
+
---------------------------
|
| 50 |
+
arabic.tagger
|
| 51 |
+
Trained on the *entire* ATB p1-3.
|
| 52 |
+
When trained on the train part of the ATB p1-3 split done for the 2005
|
| 53 |
+
JHU Summer Workshop (Diab split), using (augmented) Bies tags, it gets
|
| 54 |
+
|
| 55 |
+
French tagger
|
| 56 |
+
---------------------------
|
| 57 |
+
french-ud.tagger
|
| 58 |
+
Trained on the French GSD (UDv2.2) data set
|
| 59 |
+
|
| 60 |
+
German tagger
|
| 61 |
+
---------------------------
|
| 62 |
+
german-ud.tagger
|
| 63 |
+
Trained on the German GSD (UDv2.2) data set
|
| 64 |
+
|
| 65 |
+
Spanish tagger
|
| 66 |
+
--------------------------
|
| 67 |
+
spanish-ud.tagger
|
| 68 |
+
Trained on the Spanish AnCora (UDv2.0) data set
|
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0ea63d5a2b78cc06565accfaac5abdd64b031a422d0e09e189594098671e5e6
|
| 3 |
+
size 2577597
|
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Wed Jan 03 12:02:30 PST 2018 with arguments:
|
| 2 |
+
model = arabic-train.tagger
|
| 3 |
+
arch = words(-2,2),order(1),prefix(6),suffix(6),unicodeshapes(1)
|
| 4 |
+
wordFunction =
|
| 5 |
+
trainFile = format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Train.utf8.txt
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 1
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = /
|
| 12 |
+
encoding = UTF-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = arabic
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 3
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 3
|
| 19 |
+
rareWordThresh = 5
|
| 20 |
+
search = owlqn
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.75
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = false
|
| 26 |
+
tokenizerFactory = edu.stanford.nlp.process.WhitespaceTokenizer
|
| 27 |
+
tokenizerOptions =
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput =
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d07351166e384349a0e1a5d1a8b2987f88a5dcbad9bfb44e24444028802dbd41
|
| 3 |
+
size 2944508
|
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Wed Jan 03 12:24:48 PST 2018 with arguments:
|
| 2 |
+
model = arabic.tagger
|
| 3 |
+
arch = words(-2,2),order(1),prefix(6),suffix(6),unicodeshapes(1)
|
| 4 |
+
wordFunction =
|
| 5 |
+
trainFile = format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Train.utf8.txt;format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Dev.utf8.txt;format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Test.utf8.txt
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 1
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = /
|
| 12 |
+
encoding = UTF-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = arabic
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 3
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 3
|
| 19 |
+
rareWordThresh = 5
|
| 20 |
+
search = owlqn
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.75
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = false
|
| 26 |
+
tokenizerFactory = edu.stanford.nlp.process.WhitespaceTokenizer
|
| 27 |
+
tokenizerOptions =
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput =
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca40fdab701679ac93e9f075da134e2906f3d1a88b084bc0c018506fa7a68e4b
|
| 3 |
+
size 13866197
|
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Fri Feb 14 01:19:49 PST 2014 with arguments:
|
| 2 |
+
model = chinese-distsim.tagger
|
| 3 |
+
arch = generic,suffix(4),prefix(4),unicodeshapes(-1,1),unicodeshapeconjunction(-1,1),words(-2,-2),words(2,2),distsim(/u/nlp/data/chinese/distsim/xin_cmn_2000-2010.ldc.seg.utf8.1M.random-c1000,-1,1),distsimconjunction(/u/nlp/data/chinese/distsim/xin_cmn_2000-2010.ldc.seg.utf8.1M.random-c1000,-1,1)
|
| 4 |
+
wordFunction = edu.stanford.nlp.util.UTF8EquivalenceFunction
|
| 5 |
+
trainFile = format=TREES,/u/nlp/data/chinese/ctb7/train.mrg
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 1
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = #
|
| 12 |
+
encoding = utf-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = chinese
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 3
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 3
|
| 19 |
+
rareWordThresh = 20
|
| 20 |
+
search = owlqn
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.75
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = false
|
| 26 |
+
tokenizerFactory =
|
| 27 |
+
tokenizerOptions =
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput = null
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db4178289b565a439f4bd2f2216d770e37f8ae568dcc34b1278c1489d812a8ca
|
| 3 |
+
size 3963731
|
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Fri Feb 14 02:20:03 PST 2014 with arguments:
|
| 2 |
+
model = chinese-nodistsim.tagger
|
| 3 |
+
arch = generic,suffix(4),prefix(4),unicodeshapes(-1,1),unicodeshapeconjunction(-1,1),words(-2,-2),words(2,2)
|
| 4 |
+
wordFunction = edu.stanford.nlp.util.UTF8EquivalenceFunction
|
| 5 |
+
trainFile = format=TREES,/u/nlp/data/chinese/ctb7/train.mrg
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 1
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = #
|
| 12 |
+
encoding = utf-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = chinese
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 3
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 3
|
| 19 |
+
rareWordThresh = 20
|
| 20 |
+
search = owlqn
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.75
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = false
|
| 26 |
+
tokenizerFactory =
|
| 27 |
+
tokenizerOptions =
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput = null
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a363a88c67b25e793a9382254485d897e95b0e166cc34af314ec3d53d79899b1
|
| 3 |
+
size 20045086
|
CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-bidirectional-distsim-prod1.tagger
|
| 2 |
+
arch = bidirectional5words,allwordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUCase),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CompanyNameDetector),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorAllCapitalized),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUpperDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorStartSentenceCap),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCapC),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCap),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
|
| 3 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 4 |
+
trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/handparsed-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/train-tech-english.txt
|
| 5 |
+
closedClassTags =
|
| 6 |
+
closedClassTagThreshold = 40
|
| 7 |
+
curWordMinFeatureThresh = 2
|
| 8 |
+
debug = false
|
| 9 |
+
debugPrefix =
|
| 10 |
+
tagSeparator = _
|
| 11 |
+
encoding = UTF-8
|
| 12 |
+
iterations = 100
|
| 13 |
+
lang = english
|
| 14 |
+
learnClosedClassTags = false
|
| 15 |
+
minFeatureThresh = 2
|
| 16 |
+
openClassTags =
|
| 17 |
+
rareWordMinFeatureThresh = 5
|
| 18 |
+
rareWordThresh = 5
|
| 19 |
+
search = owlqn
|
| 20 |
+
sgml = false
|
| 21 |
+
sigmaSquared = 0.5
|
| 22 |
+
regL1 = 0.75
|
| 23 |
+
tagInside =
|
| 24 |
+
tokenize = true
|
| 25 |
+
tokenizerFactory =
|
| 26 |
+
tokenizerOptions =
|
| 27 |
+
verbose = false
|
| 28 |
+
verboseResults = true
|
| 29 |
+
veryCommonWordThresh = 250
|
| 30 |
+
xmlInput =
|
| 31 |
+
outputFile =
|
| 32 |
+
outputFormat = slashTags
|
| 33 |
+
outputFormatOptions =
|
| 34 |
+
nthreads = 1
|
| 35 |
+
minWordsLockTags = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f57ba3bca617dc8732b7d1c66f16a0f147cccbc19315938c0e03a6d4569c0488
|
| 3 |
+
size 13937235
|
CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger.props
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Sat Feb 08 23:44:06 PST 2020 with arguments:
|
| 2 |
+
model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-caseless-left3words-distsim-prod2.tagger
|
| 3 |
+
arch = left3words,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CaselessCompanyNameDetector),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
|
| 4 |
+
wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
|
| 5 |
+
trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/train-tech-english.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/handparsed-train.txt
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 2
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = _
|
| 12 |
+
encoding = UTF-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = english
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 2
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 5
|
| 19 |
+
rareWordThresh = 5
|
| 20 |
+
search = owlqn
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.75
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = true
|
| 26 |
+
tokenizerFactory =
|
| 27 |
+
tokenizerOptions =
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput =
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 1
|
| 36 |
+
minWordsLockTags = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebb5f7454da95775ecdb3ee20d3c58488cd87aa9999585951645f949e962089f
|
| 3 |
+
size 15198877
|
CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-left3words-distsim-prod1.tagger
|
| 2 |
+
arch = left3words,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUCase),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CompanyNameDetector),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorAllCapitalized),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUpperDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorStartSentenceCap),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCapC),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCap),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
|
| 3 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 4 |
+
trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/handparsed-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/train-tech-english.txt
|
| 5 |
+
closedClassTags =
|
| 6 |
+
closedClassTagThreshold = 40
|
| 7 |
+
curWordMinFeatureThresh = 2
|
| 8 |
+
debug = false
|
| 9 |
+
debugPrefix =
|
| 10 |
+
tagSeparator = _
|
| 11 |
+
encoding = UTF-8
|
| 12 |
+
iterations = 100
|
| 13 |
+
lang = english
|
| 14 |
+
learnClosedClassTags = false
|
| 15 |
+
minFeatureThresh = 2
|
| 16 |
+
openClassTags =
|
| 17 |
+
rareWordMinFeatureThresh = 5
|
| 18 |
+
rareWordThresh = 5
|
| 19 |
+
search = owlqn
|
| 20 |
+
sgml = false
|
| 21 |
+
sigmaSquared = 0.5
|
| 22 |
+
regL1 = 0.75
|
| 23 |
+
tagInside =
|
| 24 |
+
tokenize = true
|
| 25 |
+
tokenizerFactory =
|
| 26 |
+
tokenizerOptions =
|
| 27 |
+
verbose = false
|
| 28 |
+
verboseResults = true
|
| 29 |
+
veryCommonWordThresh = 250
|
| 30 |
+
xmlInput =
|
| 31 |
+
outputFile =
|
| 32 |
+
outputFormat = slashTags
|
| 33 |
+
outputFormatOptions =
|
| 34 |
+
nthreads = 1
|
| 35 |
+
minWordsLockTags = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b71ac1193d406f368d0525edc2ab295e7138c184c06a27f6363caa88429d4526
|
| 3 |
+
size 1591008
|
CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Mon Jul 01 00:54:56 PDT 2019 with arguments:
|
| 2 |
+
model = french-ud.tagger
|
| 3 |
+
arch = left3words,naacl2003unknowns,unicodeshapes(-1,1)
|
| 4 |
+
wordFunction =
|
| 5 |
+
trainFile = format=TSV,wordColumn=1,tagColumn=3,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/fr_gsd-ud-train.conllu.clean
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 2
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = _
|
| 12 |
+
encoding = utf-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = french
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 2
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 10
|
| 19 |
+
rareWordThresh = 5
|
| 20 |
+
search = owlqn2
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.75
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = true
|
| 26 |
+
tokenizerFactory =
|
| 27 |
+
tokenizerOptions = asciiQuotes
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput = null
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1537adaf468f4005882207c88fb3a7eda93d82764b24d1de7900776131fc8878
|
| 3 |
+
size 71524450
|
CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Mon Jul 01 01:05:00 PDT 2019 with arguments:
|
| 2 |
+
model = german-ud.tagger
|
| 3 |
+
arch = left3words,naacl2003unknowns,unicodeshapes(-2,2),distsim(/u/nlp/data/german/ner/hgc_175m_600,-1,1),distsimconjunction(/u/nlp/data/german/ner/hgc_175m_600,-1,1),unicodeshapeconjunction(-1,1)
|
| 4 |
+
wordFunction =
|
| 5 |
+
trainFile = format=TSV,wordColumn=1,tagColumn=3,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/de_gsd-ud-train.conllu.clean
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 2
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = _
|
| 12 |
+
encoding = utf-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = german
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 2
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 10
|
| 19 |
+
rareWordThresh = 5
|
| 20 |
+
search = owlqn2
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.625
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = true
|
| 26 |
+
tokenizerFactory =
|
| 27 |
+
tokenizerOptions = asciiQuotes
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput = null
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 1
|
CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5dba9b09f6cb0e0f58ee0d694ad5c920dec3a08c89c952c64fa52b67011e7e5d
|
| 3 |
+
size 9443457
|
CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Mon Jul 01 01:04:01 PDT 2019 with arguments:
|
| 2 |
+
model = spanish-ud.tagger
|
| 3 |
+
arch = left3words,naacl2003unknowns,allwordshapes(-1,1),distsim(/u/nlp/data/spanish/distsim/spanish.spence512.cls,-1,1),distsimconjunction(/u/nlp/data/spanish/distsim/spanish.spence512.cls,-1,1)
|
| 4 |
+
wordFunction =
|
| 5 |
+
trainFile = format=TSV,wordColumn=1,tagColumn=4,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/es_ancora-ud-train.conllu.clean
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 2
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = _
|
| 12 |
+
encoding = utf-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = spanish
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 2
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 10
|
| 19 |
+
rareWordThresh = 5
|
| 20 |
+
search = owlqn2
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.75
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = true
|
| 26 |
+
tokenizerFactory =
|
| 27 |
+
tokenizerOptions = asciiQuotes
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput = null
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 16
|
CoreNLP/stanford-postagger-full-2020-11-17/sample-input.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
A passenger plane has crashed shortly after take-off from Kyrgyzstan's
|
| 2 |
+
capital, Bishkek, killing a large number of those on board. The head of
|
| 3 |
+
Kyrgyzstan's civil aviation authority said that out of about 90
|
| 4 |
+
passengers and crew, only about 20 people have survived. The Itek Air
|
| 5 |
+
Boeing 737 took off bound for Mashhad, in north-eastern Iran, but turned
|
| 6 |
+
round some 10 minutes later.
|
CoreNLP/stanford-postagger-full-2020-11-17/sample-output.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
A_DT passenger_NN plane_NN has_VBZ crashed_VBN shortly_RB after_IN take-off_NN from_IN Kyrgyzstan_NNP 's_POS capital_NN ,_, Bishkek_NNP ,_, killing_VBG a_DT large_JJ number_NN of_IN those_DT on_IN board_NN ._.
|
| 2 |
+
The_DT head_NN of_IN Kyrgyzstan_NNP 's_POS civil_JJ aviation_NN authority_NN said_VBD that_IN out_IN of_IN about_IN 90_CD passengers_NNS and_CC crew_NN ,_, only_RB about_IN 20_CD people_NNS have_VBP survived_VBN ._.
|
| 3 |
+
The_DT Itek_NNP Air_NNP Boeing_NNP 737_CD took_VBD off_RP bound_VBN for_IN Mashhad_NNP ,_, in_IN north-eastern_JJ Iran_NNP ,_, but_CC turned_VBD round_NN some_DT 10_CD minutes_NNS later_RB ._.
|
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:197a84a195a7fbdef461144b07f8b7475b7145f3643a45f5ead295367fe6d323
|
| 3 |
+
size 4429314
|
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e7f56b55bd3ec2a5236c7a57b28923bce6a2d72d9faa5ab6fa98309ff5f25e4
|
| 3 |
+
size 2919886
|
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6090106c57da13d2ac8a1b2798dd7f437e07a9909a00f917e884bf6fa52fc8d
|
| 3 |
+
size 3650039
|
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.bat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:: runs the POS tagger (toy) GUI
|
| 2 |
+
:: usage stanford-postagger-gui
|
| 3 |
+
java -mx200m -cp "stanford-postagger.jar;" edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI
|
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.sh
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
java -mx200m -cp 'stanford-postagger.jar:' edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI
|
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.bat
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:: usage: stanford-postagger model textFile
|
| 2 |
+
:: e.g., stanford-postagger models\english-left3words-distsim.tagger sample-input.txt
|
| 3 |
+
|
| 4 |
+
java -mx300m -cp "stanford-postagger.jar;" edu.stanford.nlp.tagger.maxent.MaxentTagger -model %1 -textFile %2
|
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6090106c57da13d2ac8a1b2798dd7f437e07a9909a00f917e884bf6fa52fc8d
|
| 3 |
+
size 3650039
|
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.sh
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
#
|
| 3 |
+
# usage: ./stanford-postagger.sh model textFile
|
| 4 |
+
# e.g., ./stanford-postagger.sh models/english-left3words-distsim.tagger sample-input.txt
|
| 5 |
+
|
| 6 |
+
java -mx300m -cp 'stanford-postagger.jar:' edu.stanford.nlp.tagger.maxent.MaxentTagger -model $1 -textFile $2
|
UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll
ADDED
|
Binary file (76.8 kB). View file
|
|
|
UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll.config
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="utf-8"?>
|
| 2 |
+
<configuration>
|
| 3 |
+
<runtime>
|
| 4 |
+
<assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
|
| 5 |
+
<dependentAssembly>
|
| 6 |
+
<assemblyIdentity name="System.Diagnostics.DiagnosticSource" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
|
| 7 |
+
<bindingRedirect oldVersion="0.0.0.0-4.0.3.1" newVersion="4.0.3.1" />
|
| 8 |
+
</dependentAssembly>
|
| 9 |
+
<dependentAssembly>
|
| 10 |
+
<assemblyIdentity name="Polenter.SharpSerializer" publicKeyToken="8f4f20011571ee5f" culture="neutral" />
|
| 11 |
+
<bindingRedirect oldVersion="0.0.0.0-3.0.1.0" newVersion="3.0.1.0" />
|
| 12 |
+
</dependentAssembly>
|
| 13 |
+
<dependentAssembly>
|
| 14 |
+
<assemblyIdentity name="Telerik.Windows.Documents.Flow" publicKeyToken="5803cfa389c90ce7" culture="neutral" />
|
| 15 |
+
<bindingRedirect oldVersion="0.0.0.0-2018.2.511.40" newVersion="2018.2.511.40" />
|
| 16 |
+
</dependentAssembly>
|
| 17 |
+
<dependentAssembly>
|
| 18 |
+
<assemblyIdentity name="Telerik.Windows.Documents.Core" publicKeyToken="5803cfa389c90ce7" culture="neutral" />
|
| 19 |
+
<bindingRedirect oldVersion="0.0.0.0-2018.2.511.40" newVersion="2018.2.511.40" />
|
| 20 |
+
</dependentAssembly>
|
| 21 |
+
<dependentAssembly>
|
| 22 |
+
<assemblyIdentity name="Newtonsoft.Json" publicKeyToken="30ad4fe6b2a6aeed" culture="neutral" />
|
| 23 |
+
<bindingRedirect oldVersion="0.0.0.0-13.0.0.0" newVersion="13.0.0.0" />
|
| 24 |
+
</dependentAssembly>
|
| 25 |
+
<dependentAssembly>
|
| 26 |
+
<assemblyIdentity name="PostSharp" publicKeyToken="b13fd38b8f9c99d7" culture="neutral" />
|
| 27 |
+
<bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
|
| 28 |
+
</dependentAssembly>
|
| 29 |
+
<dependentAssembly>
|
| 30 |
+
<assemblyIdentity name="PostSharp.Patterns.Aggregation" publicKeyToken="e7f631e6ce13f078" culture="neutral" />
|
| 31 |
+
<bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
|
| 32 |
+
</dependentAssembly>
|
| 33 |
+
<dependentAssembly>
|
| 34 |
+
<assemblyIdentity name="PostSharp.Patterns.Common" publicKeyToken="e7f631e6ce13f078" culture="neutral" />
|
| 35 |
+
<bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
|
| 36 |
+
</dependentAssembly>
|
| 37 |
+
<dependentAssembly>
|
| 38 |
+
<assemblyIdentity name="System.Runtime.CompilerServices.Unsafe" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
|
| 39 |
+
<bindingRedirect oldVersion="0.0.0.0-6.0.0.0" newVersion="6.0.0.0" />
|
| 40 |
+
</dependentAssembly>
|
| 41 |
+
<dependentAssembly>
|
| 42 |
+
<assemblyIdentity name="K4os.Hash.xxHash" publicKeyToken="32cd54395057cec3" culture="neutral" />
|
| 43 |
+
<bindingRedirect oldVersion="0.0.0.0-1.0.8.0" newVersion="1.0.8.0" />
|
| 44 |
+
</dependentAssembly>
|
| 45 |
+
<dependentAssembly>
|
| 46 |
+
<assemblyIdentity name="System.Memory" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
|
| 47 |
+
<bindingRedirect oldVersion="0.0.0.0-4.0.1.2" newVersion="4.0.1.2" />
|
| 48 |
+
</dependentAssembly>
|
| 49 |
+
<dependentAssembly>
|
| 50 |
+
<assemblyIdentity name="System.Net.Http" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
|
| 51 |
+
<bindingRedirect oldVersion="0.0.0.0-4.1.0.3" newVersion="4.1.0.3" />
|
| 52 |
+
</dependentAssembly>
|
| 53 |
+
<dependentAssembly>
|
| 54 |
+
<assemblyIdentity name="System.Buffers" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
|
| 55 |
+
<bindingRedirect oldVersion="0.0.0.0-4.0.3.0" newVersion="4.0.3.0" />
|
| 56 |
+
</dependentAssembly>
|
| 57 |
+
<dependentAssembly>
|
| 58 |
+
<assemblyIdentity name="System.IO.Pipelines" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
|
| 59 |
+
<bindingRedirect oldVersion="0.0.0.0-8.0.0.0" newVersion="8.0.0.0" />
|
| 60 |
+
</dependentAssembly>
|
| 61 |
+
<dependentAssembly>
|
| 62 |
+
<assemblyIdentity name="Microsoft.Bcl.AsyncInterfaces" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
|
| 63 |
+
<bindingRedirect oldVersion="0.0.0.0-8.0.0.0" newVersion="8.0.0.0" />
|
| 64 |
+
</dependentAssembly>
|
| 65 |
+
<dependentAssembly>
|
| 66 |
+
<assemblyIdentity name="System.Security.Cryptography.X509Certificates" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
|
| 67 |
+
<bindingRedirect oldVersion="0.0.0.0-4.1.1.2" newVersion="4.1.1.2" />
|
| 68 |
+
</dependentAssembly>
|
| 69 |
+
<dependentAssembly>
|
| 70 |
+
<assemblyIdentity name="Microsoft.Win32.Primitives" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
|
| 71 |
+
<bindingRedirect oldVersion="0.0.0.0-4.0.1.0" newVersion="4.0.1.0" />
|
| 72 |
+
</dependentAssembly>
|
| 73 |
+
<dependentAssembly>
|
| 74 |
+
<assemblyIdentity name="System.Security.Cryptography.Encoding" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
|
| 75 |
+
<bindingRedirect oldVersion="0.0.0.0-4.0.1.0" newVersion="4.0.1.0" />
|
| 76 |
+
</dependentAssembly>
|
| 77 |
+
<dependentAssembly>
|
| 78 |
+
<assemblyIdentity name="System.IO.Compression" publicKeyToken="b77a5c561934e089" culture="neutral" />
|
| 79 |
+
<bindingRedirect oldVersion="0.0.0.0-4.1.2.0" newVersion="4.1.2.0" />
|
| 80 |
+
</dependentAssembly>
|
| 81 |
+
</assemblyBinding>
|
| 82 |
+
</runtime>
|
| 83 |
+
<startup><supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" /></startup></configuration>
|
UDPipe/v1/XDependencies/UDPipe/AUTHORS
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Milan Straka <[email protected]>
|
| 2 |
+
Jana Straková <[email protected]>
|