niobures commited on Jun 27

Commit

ad3b597

verified ·

1 Parent(s): fe73573

CoreNLP, UDPipe

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +104 -0
CoreNLP/models/ru/parser/nndep.rus.model81_mf.txt.gz +3 -0
CoreNLP/models/ru/parser/nndep.rus.model90.9_88.6.txt.gz +3 -0
CoreNLP/models/ru/parser/nndep.rus.modelAr100HS400.txt.gz +3 -0
CoreNLP/models/ru/parser/nndep.rus.modelMFAr100HS400_81.txt.gz +3 -0
CoreNLP/models/ru/parser/nndep.rus.modelMFWiki100HS400_80.txt.gz +3 -0
CoreNLP/models/ru/tagger/dict.tsv +3 -0
CoreNLP/models/ru/tagger/russian-ud-mf.tagger +3 -0
CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger +3 -0
CoreNLP/models/ru/tagger/russian-ud-pos.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/LICENSE.txt +339 -0
CoreNLP/stanford-postagger-full-2020-11-17/README.txt +315 -0
CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo.java +32 -0
CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo2.java +62 -0
CoreNLP/stanford-postagger-full-2020-11-17/build.xml +201 -0
CoreNLP/stanford-postagger-full-2020-11-17/data/enclitic-inflections.data +0 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/README-Models.txt +68 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger.props +36 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger.props +35 -0
CoreNLP/stanford-postagger-full-2020-11-17/sample-input.txt +6 -0
CoreNLP/stanford-postagger-full-2020-11-17/sample-output.txt +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.bat +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.sh +2 -0
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.bat +4 -0
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar +3 -0
CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.sh +6 -0
UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll +0 -0
UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll.config +83 -0
UDPipe/v1/XDependencies/UDPipe/AUTHORS +2 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,107 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+CoreNLP/models/ru/tagger/dict.tsv filter=lfs diff=lfs merge=lfs -text
+CoreNLP/models/ru/tagger/russian-ud-mf.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/models/ru/tagger/russian-ud-pos.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar filter=lfs diff=lfs merge=lfs -text
+CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linux32/csharp/libudpipe_csharp.so filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linux32/java/libudpipe_java.so filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linux32/udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linux64/csharp/libudpipe_csharp.so filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linux64/java/libudpipe_java.so filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linux64/udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linuxArmv7/libudpipe.a filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linuxArmv7/rest_server/udpipe_server filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/linuxArmv7/udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/osx/csharp/libudpipe_csharp.dylib filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/osx/java/libudpipe_java.dylib filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/osx/udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/win64/csharp/udpipe_csharp.dll filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/win64/java/udpipe_java.dll filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/binary/win64/udpipe.exe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/udpipe_csharp/x64/udpipe_csharp.dll filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/MANUAL.pdf filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Ancient_greek-PROIEL.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Ancient_greek.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Arabic.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Basque.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Belarusian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Bulgarian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Catalan.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Chinese.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Coptic.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Croatian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Czech-CAC.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Czech-CLTT.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Czech.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Danish.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Dutch-LASSYSMALL.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Dutch.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/English-LINES.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/English-PARTUT.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/English.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Estonian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Finnish-FTB.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Finnish.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/French-PARTUT.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/French-SEQUOIA.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/French.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Galician-TREEGAL.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Galician.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/German.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Gothic.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Greek.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Hebrew.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Hindi.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Hungarian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Indonesian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Irish.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Italian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Japanese.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Kazakh.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Korean.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Latin-ITTB.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Latin-PROIEL.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Latin.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Latvian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Lithuanian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Norwegian-BOKMAAL.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Norwegian-NYNORSK.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Old_church_slavonic.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Persian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Polish.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Portuguese-BR.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Portuguese.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Romanian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Russian-SYNTAGRUS.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Russian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Sanskrit.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Slovak.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Slovenian-SST.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Slovenian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Spanish-ANCORA.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Spanish.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Swedish-LINES.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Swedish.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Tamil.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Turkish.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Ukrainian.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Urdu.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Uyghur.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/Models/Vietnamese.udpipe filter=lfs diff=lfs merge=lfs -text
+UDPipe/v1/XDependencies/UDPipe/udpipe.exe filter=lfs diff=lfs merge=lfs -text

CoreNLP/models/ru/parser/nndep.rus.model81_mf.txt.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:326fc20cdd0602da6ea44279f4d04761670bb03d86297a9be59d8c921bd11580
+size 115029311

CoreNLP/models/ru/parser/nndep.rus.model90.9_88.6.txt.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f8e0cb78116c4fba174b6ab19dccc7867b7cea2dd44ccea7067ee949cb04e69
+size 118576259

CoreNLP/models/ru/parser/nndep.rus.modelAr100HS400.txt.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91b09c5d84c44ca668372b78f8c20235fb3fe701df25169dd0182e4830043b29
+size 127444031

CoreNLP/models/ru/parser/nndep.rus.modelMFAr100HS400_81.txt.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e5ea42afc93a9f9a6c7c915451d1257e6455faaa3f01b06bd1643af3bafaff2
+size 118710465

CoreNLP/models/ru/parser/nndep.rus.modelMFWiki100HS400_80.txt.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8afeae32c7675e237f5cef4f225e9ed92d8c66b80e37b06fe710cfcb6f558617
+size 119027293

CoreNLP/models/ru/tagger/dict.tsv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01234d489dc349b61843573e725f2fd3e265e004a1408a6712c5f6fafca5d506
+size 129696832

CoreNLP/models/ru/tagger/russian-ud-mf.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd10da6d84ec2f8d150d48147dac16d2209b2bd09af0e9740c7fa18abe5b1e8a
+size 27644829

CoreNLP/models/ru/tagger/russian-ud-mfmini.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c85cdd823a4c7ea7c62af4607b425b6a303f414d992a22c067d6be58f2a231df
+size 22430426

CoreNLP/models/ru/tagger/russian-ud-pos.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57eb9e303d97d9c115b879deaa0ffa6e8b2519c8713bb040e5bd5827c8ee64de
+size 9018233

CoreNLP/stanford-postagger-full-2020-11-17/LICENSE.txt ADDED Viewed

	@@ -0,0 +1,339 @@

+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+                            NO WARRANTY
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+Also add information on how to contact you by electronic and paper mail.
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.

CoreNLP/stanford-postagger-full-2020-11-17/README.txt ADDED Viewed

	@@ -0,0 +1,315 @@

+Stanford POS Tagger, v4.2.0 - 2020-11-17
+Copyright (c) 2002-2020 The Board of Trustees of
+The Leland Stanford Junior University. All Rights Reserved.
+Original tagger author: Kristina Toutanova
+Code contributions: Christopher Manning, Dan Klein, William Morgan,
+Huihsin Tseng, Anna Rafferty, John Bauer
+Major rewrite for version 2.0 by Michel Galley.
+Current release prepared by: Jason Bolton
+This package contains a Maximum Entropy part of speech tagger.
+A Part-Of-Speech Tagger (POS Tagger) is a piece of software that reads
+text in some language and assigns parts of speech to each word (and
+other tokens), such as noun, verb, adjective, etc. Generally
+computational applications use more fine-grained POS tags like
+'noun-plural'. This software is a Java implementation of the log-linear
+part-of-speech (POS) taggers described in:
+Kristina Toutanova and Christopher D. Manning. 2000. Enriching the
+Knowledge Sources Used in a Maximum Entropy Part-of-Speech
+Tagger. Proceedings of the Joint SIGDAT Conference on Empirical Methods
+in Natural Language Processing and Very Large Corpora (EMNLP/VLC-2000),
+Hong Kong.
+Kristina Toutanova, Dan Klein, Christopher Manning, and Yoram
+Singer. 2003. Feature-Rich Part-of-Speech Tagging with a Cyclic
+Dependency Network. In Proceedings of HLT-NAACL 2003 pages 252-259.
+The system requires Java 1.8+ to be installed. About 60 MB of memory is
+required to run a trained tagger, depending on the OS, tagging model
+chosen, etc.  (i.e., you may need to give to java an option like java
+-mx120m). Plenty of memory is needed to train a tagger. It depends on
+the complexity of the model but at least 1GB is recommended (java
+-mx1g). Two trained tagger models for English are included with the
+tagger, along with some caseless versions, and we provide models for
+some other languages. The tagger can be retrained on other languages
+based on POS-annotated training text.
+QUICKSTART
+-----------------------------------------------
+The Stanford POS Tagger is designed to be used from the command line or
+programmatically via its API.
+There is a GUI interface, but it is for
+demonstration purposes only; most features of the tagger can only be
+accessed via the command line. To run the demonstration GUI you should
+be able to use any of the following 2 methods:
+1)
+java -mx200m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI models/wsj-0-18-left3words-distsim.tagger
+2) Running the appropriate script for your operating system:
+    stanford-postagger-gui.bat
+    ./stanford-postagger-gui.sh
+To run the tagger from the command line, you can start with the provided
+script appropriate for you operating system:
+    ./stanford-postagger.sh models/wsj-0-18-left3words-distsim.tagger sample-input.txt
+    stanford-postagger models\wsj-0-18-left3words-distsim.tagger sample-input.txt
+The output should match what is found in sample-output.txt
+The tagger has three modes: tagging, training, and testing.  Tagging
+allows you to use a pretrained model (two English models are included)
+to assign part of speech tags to unlabeled text.  Training allows you to
+save a new model based on a set of tagged data that you provide.
+Testing allows you to see how well a tagger performs by tagging labeled
+data and evaluating the results against the correct tags.
+Many options are available for training, tagging, and testing.  These
+options can be set using a properties file.  To start, you can generate a
+default properties file by:
+java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -genprops > myPropsFile.prop
+This will create the file myPropsFile.prop with descriptions of each
+option for the tagger and the default values for these options
+specified.  Any properties you can specify in a properties file can be
+specified on the command line or vice versa.  For further information,
+please consult the Javadocs (start with the entry for MaxentTagger,
+which includes a table of all options which may be set to configure the
+tagger and descriptions of those options).
+To tag a file using the pre-trained bidirectional model
+=======================================================
+java -mx300m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -model models/wsj-0-18-bidirectional-distsim.tagger -textFile sample-input.txt > sample-tagged.txt
+Tagged output will be printed to standard out, which you can redirect
+as above.  Note that the bidirectional model is slightly more accurate
+but significantly slower than the left3words model.
+To train a simple model
+=======================
+java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -prop propertiesFile -model modelFile -trainFile trainingFile
+To test a model
+===============
+java -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -prop propertiesFile -model modelFile -testFile testFile
+Using models for French, German, and Spanish
+===========================================
+Starting with version 4.0.0, French, German, and Spanish are tokenized according to the UD 2.0 standard. This includes creating
+multiword tokens. This functionality requires the pipeline functionality only available in the full Stanford CoreNLP distribution.
+To tag French, German, or Spanish, one must provide UD 2.0 tokenized text, or upgrade to the full Stanford CoreNLP package to get
+UD 2.0 tokenization for these languages.
+To run on pretokenized text, add "-tokenize false" to your command.
+Example:
+java -mx300m -classpath stanford-postagger.jar edu.stanford.nlp.tagger.maxent.MaxentTagger -model models/french-ud.tagger -tokenize false -textFile sample-input.txt > sample-tagged.txt
+CONTENTS
+-----------------------------------------------
+README.txt
+  This file.
+LICENSE.txt
+  Stanford POS Tagger is licensed under the GNU General Public License (v2+).
+stanford-postagger.jar
+stanford-postagger-YYYY-MM-DD.jar
+  This is a JAR file containing all the Stanford classes necessary to
+  run the Stanford POS Tagger.  The two jar files are identical.  You can use
+  either the one with a version (date) indication or without, as you prefer.
+src
+  A directory containing the Java 1.8 source code for the Stanford POS
+  Tagger distribution.
+build.xml, Makefile
+  Files for building the distribution (with ant and make, respectively)
+models
+  A directory containing trained POS taggers; the taggers end in ".tagger"
+  and the props file used to make the taggers end in ".props". The
+  ".props" files cannot be directly used on your own machine as they use
+  paths on the Stanford NLP machines, but they may serve as examples for
+  your own properties files. Included in the full version are other
+  English taggers, a German tagger, an Arabic tagger, and a Chinese
+  tagger. If you chose to download the smaller version of the tagger,
+  you have only two English taggers (left3words is faster but slightly
+  less accurate than bidirectional-distsim) - feel free to download any
+  other taggers you need from the POS tagger website. More information
+  about the models can be found in the README-Models.txt file in this
+  directory.
+sample-input.txt
+  A sample text file that you can tag to demonstrate the tagger.
+sample-output.txt
+  Tagged output of the tagger (using the left3words model)
+stanford-postagger-gui.sh
+stanford-postagger-gui.bat
+  Scripts for invoking the GUI demonstration version of the tagger.
+stanford-postagger.sh
+stanford-postagger.bat
+  Scripts for running the command-line version of the tagger.
+javadoc
+  Javadocs for the distribution.  In particular, look at the javadocs
+  for the class edu.stanford.nlp.tagger.maxent.MaxentTagger.
+TaggerDemo.java
+  A sample file for how to call the tagger in your own program.  You
+  should be able to compile and run it with:
+  javac -cp stanford-postagger.jar TaggerDemo.java
+  java -cp ".:stanford-postagger.jar" TaggerDemo models/wsj-0-18-left3words-distsim.tagger sample-input.txt
+  (If you are on Windows, you need to replace the ":" with a ";" in the
+  -cp argument, and should use a "\" in place of the "/" in the filename....)
+THANKS
+-----------------------------------------------
+Thanks to the members of the Stanford Natural Language Processing Lab
+for great collaborative work on Java libraries for natural language
+processing.
+  http://nlp.stanford.edu/javanlp/
+CHANGES
+-----------------------------------------------
+2020-11-17    4.2.0     Add currency data for English models.
+2020-08-06    4.1.0     Add missing extractor, spanish tokenization
+                        upgrades
+2020-05-22    4.0.0     Model tokenization updated to UDv2.0
+2018-10-16    3.9.2     New English models, better currency symbol
+                        handling
+2018-02-27    3.9.1     new French UD model
+2017-06-09    3.8.0     new Spanish and French UD models
+2016-10-31    3.7.0     Update for compatibility, German UD model
+2015-12-09    3.6.0     Updated for compatibility
+2015-04-20    3.5.2     Update for compatibility
+2015-01-29    3.5.1     General bugfixes
+2014-10-26    3.5.0     Upgrade to Java 1.8
+2014-08-27    3.4.1     Add Spanish models
+2014-06-16      3.4     Using CC tagset for French
+2014-01-04    3.3.1     Bugfix release
+2013-11-12    3.3.0     Add imperatives to English training data
+2013-06-19    3.2.0     Decrease size and improve speed of tagger
+                        models for all languages
+2013-04-04    3.1.5     Speed improvements, ctb7 model, -nthreads
+                        option
+2012-11-11    3.1.4     Updated Chinese model
+2012-07-09    3.1.3     Minor bug fixes
+2012-05-22    3.1.2     Updated for compatibility with other releases
+2012-03-09    3.1.1     Caseless models added
+2012-01-06    3.1.0     French tagger added, tagging speed improved
+2011-09-14    3.0.4     Updated for compatibility with other releases
+2011-06-15    3.0.3     Updated for compatibility with other releases
+2011-05-15    3.0.2     Can read training files in TSV format
+2011-04-17    3.0.1     Improved German and Arabic models
+                        Compatible with other Stanford releases
+2010-05-21    3.0.0     Re-entrant
+LICENSE
+-----------------------------------------------
+ Stanford POS Tagger
+ Copyright (c) 2002-2010 The Board of Trustees of
+ The Leland Stanford Junior University. All Rights Reserved.
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see http://www.gnu.org/licenses/ .
+ For more information, bug reports, fixes, contact:
+    Christopher Manning
+    Dept of Computer Science, Gates 2A
+    Stanford CA 94305-9020
+    USA
+    Support/Questions: [email protected]
+	  Licensing: [email protected]
+    http://nlp.stanford.edu/software/tagger.html
+CONTACT
+-----------------------------------------------
+For questions about the Stanford POS tagger, please feel free to contact
+the Stanford JavaNLP user community at the mailing list
+[email protected].  You need to be a member of this
+mailing list to be able to post to it.  Join the list either by emailing
+[email protected] (leave the subject and message
+body empty) or by using the web interface at:
+       https://mailman.stanford.edu/mailman/listinfo/java-nlp-user
+This is the best list to post to in order to ask questions, make
+announcements, or for discussion among Stanford JavaNLP tool users. We
+provide assistance on a best-effort basis. You can also look at the list
+archives via https://mailman.stanford.edu/pipermail/java-nlp-user/. For
+licensing questions, please see the tagger webpage or contact Stanford
+JavaNLP at [email protected].

CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo.java ADDED Viewed

	@@ -0,0 +1,32 @@

+import edu.stanford.nlp.util.logging.Redwood;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.util.List;
+import edu.stanford.nlp.ling.SentenceUtils;
+import edu.stanford.nlp.ling.TaggedWord;
+import edu.stanford.nlp.ling.HasWord;
+import edu.stanford.nlp.tagger.maxent.MaxentTagger;
+public class TaggerDemo  {
+  /** A logger for this class */
+  private static Redwood.RedwoodChannels log = Redwood.channels(TaggerDemo.class);
+  private TaggerDemo() {}
+  public static void main(String[] args) throws Exception {
+    if (args.length != 2) {
+      log.info("usage: java TaggerDemo modelFile fileToTag");
+      return;
+    }
+    MaxentTagger tagger = new MaxentTagger(args[0]);
+    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
+    for (List<HasWord> sentence : sentences) {
+      List<TaggedWord> tSentence = tagger.tagSentence(sentence);
+      System.out.println(SentenceUtils.listToString(tSentence, false));
+    }
+  }
+}

CoreNLP/stanford-postagger-full-2020-11-17/TaggerDemo2.java ADDED Viewed

	@@ -0,0 +1,62 @@

+import edu.stanford.nlp.util.logging.Redwood;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.List;
+import edu.stanford.nlp.ling.SentenceUtils;
+import edu.stanford.nlp.ling.TaggedWord;
+import edu.stanford.nlp.ling.HasWord;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.process.CoreLabelTokenFactory;
+import edu.stanford.nlp.process.DocumentPreprocessor;
+import edu.stanford.nlp.process.PTBTokenizer;
+import edu.stanford.nlp.process.TokenizerFactory;
+import edu.stanford.nlp.tagger.maxent.MaxentTagger;
+/** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
+ *  being tagged by the tagger. The sentences are generated by direct use
+ *  of the DocumentPreprocessor class.
+ *
+ *  @author Christopher Manning
+ */
+public class TaggerDemo2  {
+  /** A logger for this class */
+  private static Redwood.RedwoodChannels log = Redwood.channels(TaggerDemo2.class);
+  private TaggerDemo2() {}
+  public static void main(String[] args) throws Exception {
+    if (args.length != 2) {
+      log.info("usage: java TaggerDemo2 modelFile fileToTag");
+      return;
+    }
+    MaxentTagger tagger = new MaxentTagger(args[0]);
+    TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
+									   "untokenizable=noneKeep");
+    BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
+    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
+    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
+    documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
+    for (List<HasWord> sentence : documentPreprocessor) {
+      List<TaggedWord> tSentence = tagger.tagSentence(sentence);
+      pw.println(SentenceUtils.listToString(tSentence, false));
+    }
+    // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
+    List<HasWord> sent = SentenceUtils.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
+    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
+    for (TaggedWord tw : taggedSent) {
+      if (tw.tag().startsWith("JJ")) {
+        pw.println(tw.word());
+      }
+    }
+    pw.close();
+  }
+}

CoreNLP/stanford-postagger-full-2020-11-17/build.xml ADDED Viewed

	@@ -0,0 +1,201 @@

+<!-- build.xml file for ant for JavaNLP -->
+<!-- Before using this, unjar the sources' jar file into the src/ directory! -->
+<!-- A "project" describes a set of targets that may be requested
+     when Ant is executed.  The "default" attribute defines the
+     target which is executed if no specific target is requested,
+     and the "basedir" attribute defines the current working directory
+     from which Ant executes the requested task.  This is normally
+     set to the current working directory.
+-->
+<project name="JavaNLP" default="compile" basedir=".">
+  <property name="build.home"    value="${basedir}/classes"/>
+  <property name="build.tests"    value="${basedir}/classes"/>
+  <property name="docs.home"     value="${basedir}/docs"/>
+  <property name="src.home"      value="${basedir}/src"/>
+  <property name="javadoc.home"  value="${basedir}/javadoc"/>
+<!--  ==================== Compilation Control Options ==================== -->
+<!--
+  These properties control option settings on the Javac compiler when it
+  is invoked using the <javac> task.
+  compile.debug        Should compilation include the debug option?
+  compile.deprecation  Should compilation include the deprecation option?
+  compile.optimize     Should compilation include the optimize option?
+  compile.source       Source version compatibility
+  compile.target       Target class version compatibility
+-->
+  <property name="compile.debug"       value="true"/>
+  <property name="compile.deprecation" value="false"/>
+  <property name="compile.optimize"    value="true"/>
+  <property name="compile.source"      value="1.8" />
+  <property name="compile.target"      value="1.8" />
+  <property name="compile.encoding"      value="utf-8" />
+<!-- ==================== All Target ====================================== -->
+<!--
+  The "all" target is a shortcut for running the "clean" target followed
+  by the "compile" target, to force a complete recompile.
+-->
+  <target name="all" depends="clean,compile"
+   description="Clean build and dist directories, then compile"/>
+<!-- ==================== Clean Target ==================================== -->
+<!--
+  The "clean" target deletes any previous "build" and "dist" directory,
+  so that you can be ensured the application can be built from scratch.
+-->
+  <target name="clean" description="Delete old classes">
+    <delete dir="${build.home}/edu"/>
+  </target>
+<!-- ==================== Compile Target ================================== -->
+<!--
+  The "compile" target transforms source files (from your "src" directory)
+  into object files in the appropriate location in the build directory.
+  This example assumes that you will be including your classes in an
+  unpacked directory hierarchy under "/WEB-INF/classes".
+-->
+  <target name="compile" depends="prepare"
+   description="Compile Java sources">
+    <!-- Compile Java classes as necessary -->
+    <mkdir    dir="${build.home}"/>
+    <javac srcdir="${src.home}"
+          destdir="${build.home}"
+            debug="${compile.debug}"
+         encoding="${compile.encoding}"
+      deprecation="${compile.deprecation}"
+         optimize="${compile.optimize}"
+	   source="${compile.source}"
+           target="${compile.target}"
+            includeantruntime="false">
+      <compilerarg value="-Xmaxerrs"/>
+      <compilerarg value="20"/>
+      <!-- <compilerarg value="-Xlint"/> -->
+    </javac>
+    <!-- Copy application resources -->
+<!--
+    <copy  todir="${build.home}/WEB-INF/classes">
+      <fileset dir="${src.home}" excludes="**/*.java"/>
+    </copy>
+-->
+  </target>
+<!-- ==================== Jar Target ================================== -->
+<!--
+  The "jar" target recreates the jar file, which you may want to do if
+  you take apart the source jar and change something.
+-->
+  <property name="jar.output"      value="stanford-postagger.jar" />
+  <property name="jar.mainclass"   value="edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI" />
+  <target name="jar" depends="compile" description="Build a jar file">
+    <jar destfile="${jar.output}">
+      <fileset dir="${build.home}"/>
+      <manifest>
+        <attribute name="Main-Class" value="${jar.mainclass}"/>
+      </manifest>
+    </jar>
+  </target>
+<!-- ==================== Javadoc Target ================================== -->
+<!--
+  The "javadoc" target creates Javadoc API documentation for the Java
+  classes included in your application.  Normally, this is only required
+  when preparing a distribution release, but is available as a separate
+  target in case the developer wants to create Javadocs independently.
+-->
+  <target name="javadoc" depends="compile"
+   description="Create Javadoc API documentation">
+    <mkdir          dir="${javadoc.home}"/>
+    <javadoc sourcepath="${src.home}"
+                destdir="${javadoc.home}"
+              maxmemory="1g"
+                 author="true"
+                 source="${compile.source}"
+                overview="${src.home}/edu/stanford/nlp/overview.html"
+                doctitle="Stanford JavaNLP API Documentation"
+             windowtitle="Stanford JavaNLP API"
+             encoding="${compile.encoding}"
+	     docencoding="${compile.encoding}"
+	     charset="${compile.encoding}"
+            packagenames="*">
+      <!-- Allow @generated, @modifiable and @ordered tags -->
+      <tag name="generated" scope="all" description="Generated" />
+      <tag name="modifiable" scope="all" description="Modifiable" />
+      <tag name="ordered" scope="all" description="Ordered" />
+      <!-- Depends on lib and classes folders -->
+      <classpath>
+        <pathelement path="${build.home}" />
+      </classpath>
+      <bottom><![CDATA[<font size="2"><a href="https://nlp.stanford.edu" target="_top">Stanford NLP Group</a></font>]]></bottom>
+      <link href="https://docs.oracle.com/javase/8/docs/api/"/>
+    </javadoc>
+  </target>
+<!-- ==================== Prepare Target ================================== -->
+<!--
+  The "prepare" target is used to create the "build" destination directory,
+  and copy the static contents of your web application to it.  If you need
+  to copy static files from external dependencies, you can customize the
+  contents of this task.
+  Normally, this task is executed indirectly when needed.
+-->
+  <target name="prepare">
+    <!-- Create build directories as needed -->
+    <mkdir  dir="${build.home}"/>
+  </target>
+</project>

CoreNLP/stanford-postagger-full-2020-11-17/data/enclitic-inflections.data ADDED Viewed

The diff for this file is too large to render. See raw diff

CoreNLP/stanford-postagger-full-2020-11-17/models/README-Models.txt ADDED Viewed

	@@ -0,0 +1,68 @@

+Stanford POS Tagger, v4.2.0 - 2020-11-17
+Copyright (c) 2002-2020 The Board of Trustees of
+The Leland Stanford Junior University. All Rights Reserved.
+This document contains (some) information about the models included in
+this release and that may be downloaded for the POS tagger website at
+http://nlp.stanford.edu/software/tagger.html . All of the models mentioned
+in this document are in the downloaded package in the same directory as this
+readme. All taggers are accompanied by the props files used to create
+them; please examine these files for more detailed information about the
+creation of the taggers.
+For English, the bidirectional taggers are slightly more accurate, but
+tag much more slowly; choose the appropriate tagger based on your
+speed/performance needs.
+English taggers
+---------------------------
+english-left3words-distsim.tagger
+Trained on WSJ sections 0-18 and extra parser training data using the
+left3words architecture and includes word shape and distributional
+similarity features. Penn tagset. UDv2.0 tokenization standard.
+english-bidirectional-distsim.tagger
+Trained on WSJ sections 0-18 using a bidirectional architecture and
+including word shape and distributional similarity features.
+Penn Treebank tagset. UDv2.0 tokenization standard.
+english-caseless-left3words-distsim.tagger
+Trained on WSJ sections 0-18 and extra parser training data using the
+left3words architecture and includes word shape and distributional
+similarity features. Penn tagset. Ignores case. UDv2.0 tokenization
+standard.
+Chinese tagger
+---------------------------
+chinese-nodistsim.tagger
+Trained on a combination of CTB7 texts from Chinese and Hong Kong
+sources.
+LDC Chinese Treebank POS tag set.
+chinese-distsim.tagger
+Trained on a combination of CTB7 texts from Chinese and Hong Kong
+sources with distributional similarity clusters.
+LDC Chinese Treebank POS tag set.
+Arabic tagger
+---------------------------
+arabic.tagger
+Trained on the *entire* ATB p1-3.
+When trained on the train part of the ATB p1-3 split done for the 2005
+JHU Summer Workshop (Diab split), using (augmented) Bies tags, it gets
+French tagger
+---------------------------
+french-ud.tagger
+Trained on the French GSD (UDv2.2) data set
+German tagger
+---------------------------
+german-ud.tagger
+Trained on the German GSD (UDv2.2) data set
+Spanish tagger
+--------------------------
+spanish-ud.tagger
+Trained on the Spanish AnCora (UDv2.0) data set

CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0ea63d5a2b78cc06565accfaac5abdd64b031a422d0e09e189594098671e5e6
+size 2577597

CoreNLP/stanford-postagger-full-2020-11-17/models/arabic-train.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+## tagger training invoked at Wed Jan 03 12:02:30 PST 2018 with arguments:
+                   model = arabic-train.tagger
+                    arch = words(-2,2),order(1),prefix(6),suffix(6),unicodeshapes(1)
+            wordFunction =
+               trainFile = format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Train.utf8.txt
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 1
+                   debug = false
+             debugPrefix =
+            tagSeparator = /
+                encoding = UTF-8
+              iterations = 100
+                    lang = arabic
+    learnClosedClassTags = false
+        minFeatureThresh = 3
+           openClassTags =
+rareWordMinFeatureThresh = 3
+          rareWordThresh = 5
+                  search = owlqn
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.75
+               tagInside =
+                tokenize = false
+        tokenizerFactory = edu.stanford.nlp.process.WhitespaceTokenizer
+        tokenizerOptions =
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput =
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d07351166e384349a0e1a5d1a8b2987f88a5dcbad9bfb44e24444028802dbd41
+size 2944508

CoreNLP/stanford-postagger-full-2020-11-17/models/arabic.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+## tagger training invoked at Wed Jan 03 12:24:48 PST 2018 with arguments:
+                   model = arabic.tagger
+                    arch = words(-2,2),order(1),prefix(6),suffix(6),unicodeshapes(1)
+            wordFunction =
+               trainFile = format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Train.utf8.txt;format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Dev.utf8.txt;format=TREES,treeNormalizer=edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer,trf=edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Test.utf8.txt
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 1
+                   debug = false
+             debugPrefix =
+            tagSeparator = /
+                encoding = UTF-8
+              iterations = 100
+                    lang = arabic
+    learnClosedClassTags = false
+        minFeatureThresh = 3
+           openClassTags =
+rareWordMinFeatureThresh = 3
+          rareWordThresh = 5
+                  search = owlqn
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.75
+               tagInside =
+                tokenize = false
+        tokenizerFactory = edu.stanford.nlp.process.WhitespaceTokenizer
+        tokenizerOptions =
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput =
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca40fdab701679ac93e9f075da134e2906f3d1a88b084bc0c018506fa7a68e4b
+size 13866197

CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-distsim.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+## tagger training invoked at Fri Feb 14 01:19:49 PST 2014 with arguments:
+                   model = chinese-distsim.tagger
+                    arch = generic,suffix(4),prefix(4),unicodeshapes(-1,1),unicodeshapeconjunction(-1,1),words(-2,-2),words(2,2),distsim(/u/nlp/data/chinese/distsim/xin_cmn_2000-2010.ldc.seg.utf8.1M.random-c1000,-1,1),distsimconjunction(/u/nlp/data/chinese/distsim/xin_cmn_2000-2010.ldc.seg.utf8.1M.random-c1000,-1,1)
+            wordFunction = edu.stanford.nlp.util.UTF8EquivalenceFunction
+               trainFile = format=TREES,/u/nlp/data/chinese/ctb7/train.mrg
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 1
+                   debug = false
+             debugPrefix =
+            tagSeparator = #
+                encoding = utf-8
+              iterations = 100
+                    lang = chinese
+    learnClosedClassTags = false
+        minFeatureThresh = 3
+           openClassTags =
+rareWordMinFeatureThresh = 3
+          rareWordThresh = 20
+                  search = owlqn
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.75
+               tagInside =
+                tokenize = false
+        tokenizerFactory =
+        tokenizerOptions =
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput = null
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db4178289b565a439f4bd2f2216d770e37f8ae568dcc34b1278c1489d812a8ca
+size 3963731

CoreNLP/stanford-postagger-full-2020-11-17/models/chinese-nodistsim.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+## tagger training invoked at Fri Feb 14 02:20:03 PST 2014 with arguments:
+                   model = chinese-nodistsim.tagger
+                    arch = generic,suffix(4),prefix(4),unicodeshapes(-1,1),unicodeshapeconjunction(-1,1),words(-2,-2),words(2,2)
+            wordFunction = edu.stanford.nlp.util.UTF8EquivalenceFunction
+               trainFile = format=TREES,/u/nlp/data/chinese/ctb7/train.mrg
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 1
+                   debug = false
+             debugPrefix =
+            tagSeparator = #
+                encoding = utf-8
+              iterations = 100
+                    lang = chinese
+    learnClosedClassTags = false
+        minFeatureThresh = 3
+           openClassTags =
+rareWordMinFeatureThresh = 3
+          rareWordThresh = 20
+                  search = owlqn
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.75
+               tagInside =
+                tokenize = false
+        tokenizerFactory =
+        tokenizerOptions =
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput = null
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a363a88c67b25e793a9382254485d897e95b0e166cc34af314ec3d53d79899b1
+size 20045086

CoreNLP/stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-bidirectional-distsim-prod1.tagger
+arch = bidirectional5words,allwordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUCase),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CompanyNameDetector),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorAllCapitalized),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUpperDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorStartSentenceCap),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCapC),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCap),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/handparsed-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-bidirectional-distsim-prod1/train/train-tech-english.txt
+closedClassTags =
+closedClassTagThreshold = 40
+curWordMinFeatureThresh = 2
+debug = false
+debugPrefix =
+tagSeparator = _
+encoding = UTF-8
+iterations = 100
+lang = english
+learnClosedClassTags = false
+minFeatureThresh = 2
+openClassTags =
+rareWordMinFeatureThresh = 5
+rareWordThresh = 5
+search = owlqn
+sgml = false
+sigmaSquared = 0.5
+regL1 = 0.75
+tagInside =
+tokenize = true
+tokenizerFactory =
+tokenizerOptions =
+verbose = false
+verboseResults = true
+veryCommonWordThresh = 250
+xmlInput =
+outputFile =
+outputFormat = slashTags
+outputFormatOptions =
+nthreads = 1
+minWordsLockTags = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f57ba3bca617dc8732b7d1c66f16a0f147cccbc19315938c0e03a6d4569c0488
+size 13937235

CoreNLP/stanford-postagger-full-2020-11-17/models/english-caseless-left3words-distsim.tagger.props ADDED Viewed

	@@ -0,0 +1,36 @@

+## tagger training invoked at Sat Feb 08 23:44:06 PST 2020 with arguments:
+                   model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-caseless-left3words-distsim-prod2.tagger
+                    arch = left3words,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CaselessCompanyNameDetector),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
+            wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
+               trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/train-tech-english.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-caseless-left3words-distsim-prod2/train/handparsed-train.txt
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 2
+                   debug = false
+             debugPrefix =
+            tagSeparator = _
+                encoding = UTF-8
+              iterations = 100
+                    lang = english
+    learnClosedClassTags = false
+        minFeatureThresh = 2
+           openClassTags =
+rareWordMinFeatureThresh = 5
+          rareWordThresh = 5
+                  search = owlqn
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.75
+               tagInside =
+                tokenize = true
+        tokenizerFactory =
+        tokenizerOptions =
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput =
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 1
+        minWordsLockTags = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebb5f7454da95775ecdb3ee20d3c58488cd87aa9999585951645f949e962089f
+size 15198877

CoreNLP/stanford-postagger-full-2020-11-17/models/english-left3words-distsim.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+model = /u/nlp/data/pos-tagger/models-4.0.0/models/english-left3words-distsim-prod1.tagger
+arch = left3words,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUCase),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorCNumber),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorLetterDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.CompanyNameDetector),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorAllCapitalized),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorUpperDigitDash),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorStartSentenceCap),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCapC),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorMidSentenceCap),prefix(10),suffix(10),unicodeshapes(0),rareExtractor(edu.stanford.nlp.tagger.maxent.ExtractorNonAlphanumeric)
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+trainFile = /u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/craft-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/ewt-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/questionbank-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/train-currency.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/handparsed-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/ontonotes-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/wsj-train.txt;/u/nlp/data/pos-tagger/models-4.0.0/data/experiments/english-left3words-distsim-prod1/train/train-tech-english.txt
+closedClassTags =
+closedClassTagThreshold = 40
+curWordMinFeatureThresh = 2
+debug = false
+debugPrefix =
+tagSeparator = _
+encoding = UTF-8
+iterations = 100
+lang = english
+learnClosedClassTags = false
+minFeatureThresh = 2
+openClassTags =
+rareWordMinFeatureThresh = 5
+rareWordThresh = 5
+search = owlqn
+sgml = false
+sigmaSquared = 0.5
+regL1 = 0.75
+tagInside =
+tokenize = true
+tokenizerFactory =
+tokenizerOptions =
+verbose = false
+verboseResults = true
+veryCommonWordThresh = 250
+xmlInput =
+outputFile =
+outputFormat = slashTags
+outputFormatOptions =
+nthreads = 1
+minWordsLockTags = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b71ac1193d406f368d0525edc2ab295e7138c184c06a27f6363caa88429d4526
+size 1591008

CoreNLP/stanford-postagger-full-2020-11-17/models/french-ud.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+## tagger training invoked at Mon Jul 01 00:54:56 PDT 2019 with arguments:
+                   model = french-ud.tagger
+                    arch = left3words,naacl2003unknowns,unicodeshapes(-1,1)
+            wordFunction =
+               trainFile = format=TSV,wordColumn=1,tagColumn=3,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/fr_gsd-ud-train.conllu.clean
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 2
+                   debug = false
+             debugPrefix =
+            tagSeparator = _
+                encoding = utf-8
+              iterations = 100
+                    lang = french
+    learnClosedClassTags = false
+        minFeatureThresh = 2
+           openClassTags =
+rareWordMinFeatureThresh = 10
+          rareWordThresh = 5
+                  search = owlqn2
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.75
+               tagInside =
+                tokenize = true
+        tokenizerFactory =
+        tokenizerOptions = asciiQuotes
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput = null
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1537adaf468f4005882207c88fb3a7eda93d82764b24d1de7900776131fc8878
+size 71524450

CoreNLP/stanford-postagger-full-2020-11-17/models/german-ud.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+## tagger training invoked at Mon Jul 01 01:05:00 PDT 2019 with arguments:
+                   model = german-ud.tagger
+                    arch = left3words,naacl2003unknowns,unicodeshapes(-2,2),distsim(/u/nlp/data/german/ner/hgc_175m_600,-1,1),distsimconjunction(/u/nlp/data/german/ner/hgc_175m_600,-1,1),unicodeshapeconjunction(-1,1)
+            wordFunction =
+               trainFile = format=TSV,wordColumn=1,tagColumn=3,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/de_gsd-ud-train.conllu.clean
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 2
+                   debug = false
+             debugPrefix =
+            tagSeparator = _
+                encoding = utf-8
+              iterations = 100
+                    lang = german
+    learnClosedClassTags = false
+        minFeatureThresh = 2
+           openClassTags =
+rareWordMinFeatureThresh = 10
+          rareWordThresh = 5
+                  search = owlqn2
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.625
+               tagInside =
+                tokenize = true
+        tokenizerFactory =
+        tokenizerOptions = asciiQuotes
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput = null
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 1

CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5dba9b09f6cb0e0f58ee0d694ad5c920dec3a08c89c952c64fa52b67011e7e5d
+size 9443457

CoreNLP/stanford-postagger-full-2020-11-17/models/spanish-ud.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+## tagger training invoked at Mon Jul 01 01:04:01 PDT 2019 with arguments:
+                   model = spanish-ud.tagger
+                    arch = left3words,naacl2003unknowns,allwordshapes(-1,1),distsim(/u/nlp/data/spanish/distsim/spanish.spence512.cls,-1,1),distsimconjunction(/u/nlp/data/spanish/distsim/spanish.spence512.cls,-1,1)
+            wordFunction =
+               trainFile = format=TSV,wordColumn=1,tagColumn=4,/u/nlp/data/depparser/nn/models-4.0.0/data/clean/es_ancora-ud-train.conllu.clean
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 2
+                   debug = false
+             debugPrefix =
+            tagSeparator = _
+                encoding = utf-8
+              iterations = 100
+                    lang = spanish
+    learnClosedClassTags = false
+        minFeatureThresh = 2
+           openClassTags =
+rareWordMinFeatureThresh = 10
+          rareWordThresh = 5
+                  search = owlqn2
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.75
+               tagInside =
+                tokenize = true
+        tokenizerFactory =
+        tokenizerOptions = asciiQuotes
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput = null
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 16

CoreNLP/stanford-postagger-full-2020-11-17/sample-input.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+A passenger plane has crashed shortly after take-off from Kyrgyzstan's
+capital, Bishkek, killing a large number of those on board. The head of
+Kyrgyzstan's civil aviation authority said that out of about 90
+passengers and crew, only about 20 people have survived. The Itek Air
+Boeing 737 took off bound for Mashhad, in north-eastern Iran, but turned
+round some 10 minutes later.

CoreNLP/stanford-postagger-full-2020-11-17/sample-output.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+A_DT passenger_NN plane_NN has_VBZ crashed_VBN shortly_RB after_IN take-off_NN from_IN Kyrgyzstan_NNP 's_POS capital_NN ,_, Bishkek_NNP ,_, killing_VBG a_DT large_JJ number_NN of_IN those_DT on_IN board_NN ._.
+The_DT head_NN of_IN Kyrgyzstan_NNP 's_POS civil_JJ aviation_NN authority_NN said_VBD that_IN out_IN of_IN about_IN 90_CD passengers_NNS and_CC crew_NN ,_, only_RB about_IN 20_CD people_NNS have_VBP survived_VBN ._.
+The_DT Itek_NNP Air_NNP Boeing_NNP 737_CD took_VBD off_RP bound_VBN for_IN Mashhad_NNP ,_, in_IN north-eastern_JJ Iran_NNP ,_, but_CC turned_VBD round_NN some_DT 10_CD minutes_NNS later_RB ._.

CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-javadoc.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:197a84a195a7fbdef461144b07f8b7475b7145f3643a45f5ead295367fe6d323
+size 4429314

CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0-sources.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e7f56b55bd3ec2a5236c7a57b28923bce6a2d72d9faa5ab6fa98309ff5f25e4
+size 2919886

CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-4.2.0.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6090106c57da13d2ac8a1b2798dd7f437e07a9909a00f917e884bf6fa52fc8d
+size 3650039

CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.bat ADDED Viewed

	@@ -0,0 +1,3 @@

+:: runs the POS tagger (toy) GUI
+:: usage stanford-postagger-gui
+java -mx200m -cp "stanford-postagger.jar;" edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI

CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger-gui.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ #!/bin/sh
2	+ java -mx200m -cp 'stanford-postagger.jar:' edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI

CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.bat ADDED Viewed

	@@ -0,0 +1,4 @@

+:: usage: stanford-postagger model textFile
+::  e.g., stanford-postagger models\english-left3words-distsim.tagger sample-input.txt
+java -mx300m -cp "stanford-postagger.jar;" edu.stanford.nlp.tagger.maxent.MaxentTagger -model %1 -textFile %2

CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6090106c57da13d2ac8a1b2798dd7f437e07a9909a00f917e884bf6fa52fc8d
+size 3650039

CoreNLP/stanford-postagger-full-2020-11-17/stanford-postagger.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/bin/sh
+#
+# usage: ./stanford-postagger.sh model textFile
+#  e.g., ./stanford-postagger.sh models/english-left3words-distsim.tagger sample-input.txt
+java -mx300m -cp 'stanford-postagger.jar:' edu.stanford.nlp.tagger.maxent.MaxentTagger -model $1 -textFile $2

UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll ADDED Viewed

Binary file (76.8 kB). View file

UDPipe/v1/CorpusExplorer/udpipe_addon/CorpusExplorer.Sdk.Extern.UdPipe.dll.config ADDED Viewed

	@@ -0,0 +1,83 @@

+<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+  <runtime>
+    <assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
+      <dependentAssembly>
+        <assemblyIdentity name="System.Diagnostics.DiagnosticSource" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.0.3.1" newVersion="4.0.3.1" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="Polenter.SharpSerializer" publicKeyToken="8f4f20011571ee5f" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-3.0.1.0" newVersion="3.0.1.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="Telerik.Windows.Documents.Flow" publicKeyToken="5803cfa389c90ce7" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-2018.2.511.40" newVersion="2018.2.511.40" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="Telerik.Windows.Documents.Core" publicKeyToken="5803cfa389c90ce7" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-2018.2.511.40" newVersion="2018.2.511.40" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="Newtonsoft.Json" publicKeyToken="30ad4fe6b2a6aeed" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-13.0.0.0" newVersion="13.0.0.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="PostSharp" publicKeyToken="b13fd38b8f9c99d7" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="PostSharp.Patterns.Aggregation" publicKeyToken="e7f631e6ce13f078" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="PostSharp.Patterns.Common" publicKeyToken="e7f631e6ce13f078" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-6.0.27.0" newVersion="6.0.27.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.Runtime.CompilerServices.Unsafe" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-6.0.0.0" newVersion="6.0.0.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="K4os.Hash.xxHash" publicKeyToken="32cd54395057cec3" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-1.0.8.0" newVersion="1.0.8.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.Memory" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.0.1.2" newVersion="4.0.1.2" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.Net.Http" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.1.0.3" newVersion="4.1.0.3" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.Buffers" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.0.3.0" newVersion="4.0.3.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.IO.Pipelines" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-8.0.0.0" newVersion="8.0.0.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="Microsoft.Bcl.AsyncInterfaces" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-8.0.0.0" newVersion="8.0.0.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.Security.Cryptography.X509Certificates" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.1.1.2" newVersion="4.1.1.2" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="Microsoft.Win32.Primitives" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.0.1.0" newVersion="4.0.1.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.Security.Cryptography.Encoding" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.0.1.0" newVersion="4.0.1.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.IO.Compression" publicKeyToken="b77a5c561934e089" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.1.2.0" newVersion="4.1.2.0" />
+      </dependentAssembly>
+    </assemblyBinding>
+  </runtime>
+<startup><supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" /></startup></configuration>

UDPipe/v1/XDependencies/UDPipe/AUTHORS ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Milan Straka <[email protected]>
2	+ Jana Straková <[email protected]>