Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +566 -0
- config.json +45 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,566 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- dense
|
7 |
+
- generated_from_trainer
|
8 |
+
- dataset_size:4524
|
9 |
+
- loss:MultipleNegativesRankingLoss
|
10 |
+
base_model: nomic-ai/modernbert-embed-base
|
11 |
+
widget:
|
12 |
+
- source_sentence: The Bell Captain orchestrates a professional and memorable service
|
13 |
+
experience for all guests and customers. Apart from providing bell services to
|
14 |
+
guests, addressing their concerns and feedback and recommending them tourism products
|
15 |
+
and services, he/she also performs the role of a team leader, allocating work
|
16 |
+
duties, coordinating and supervising bell service operations to ensure service
|
17 |
+
standards are met or exceeded. As a key service ambassador, he oversees the grooming
|
18 |
+
of the bell staff and acts as a role model to drive service culture within the
|
19 |
+
team. To ensure workplace safety and the security of the property, the Bell Captain
|
20 |
+
monitors the bell team's compliance with organisational and regulatory requirements
|
21 |
+
as they carry out their duties and keeps an eye on lobby activities to identify
|
22 |
+
security threats. He also assists in the execution of crisis management plans
|
23 |
+
during emergency situations. As a senior member of the team, he trains and guides
|
24 |
+
team members on their work performance. He is a service-oriented individual who
|
25 |
+
possesses excellent guest relations and communication skills to interact with
|
26 |
+
guests from diverse cultural backgrounds. He is physically fit to lift heavy pieces
|
27 |
+
of luggage and to stand or walk for an extended period of time or for an entire
|
28 |
+
work shift. He works shifts, including weekends and public holidays.
|
29 |
+
sentences:
|
30 |
+
- Senior User Experience Designer leading projects to create compelling user interactions,
|
31 |
+
conducting research to identify innovative design solutions, and responsible for
|
32 |
+
all UX design choices within the company. The goal is to improve customer engagement
|
33 |
+
and loyalty while developing strategies to address potential risks in implementing
|
34 |
+
new design solutions. This role involves forging strategic partnerships to fulfill
|
35 |
+
organizational objectives and providing expert guidance to enhance team members'
|
36 |
+
skills. Exceptional leadership and communication abilities are essential to inspire
|
37 |
+
stakeholders in realizing the best user experience and securing their support
|
38 |
+
for proposed initiatives, along with staying current with industry trends and
|
39 |
+
demonstrating strong negotiation and problem-solving skills.
|
40 |
+
- Bell Captain responsible for delivering exceptional service experiences to guests,
|
41 |
+
managing bell services, addressing guest feedback, and recommending local attractions.
|
42 |
+
This role includes leading the bell team, supervising operations to maintain high
|
43 |
+
service standards, and ensuring compliance with safety and security protocols.
|
44 |
+
The Bell Captain also plays a crucial role in training staff and fostering a strong
|
45 |
+
service culture, while being physically capable of handling luggage and working
|
46 |
+
varied shifts.
|
47 |
+
- Junior Security Officer ensures the safety and protection of guests and property
|
48 |
+
by monitoring lobby activities and addressing security concerns. This role involves
|
49 |
+
supervising the security team, allocating duties, and maintaining compliance with
|
50 |
+
safety regulations. The Junior Security Officer also trains new team members on
|
51 |
+
security protocols and is responsible for responding to emergency situations.
|
52 |
+
Candidates must be physically fit to manage security equipment and stand for long
|
53 |
+
periods, working shifts that include weekends and holidays.
|
54 |
+
- source_sentence: The Centre Director/Head of Institute sets the overall learning
|
55 |
+
direction for the organisation, formulates strategic goals and drives organisational
|
56 |
+
growth. He/She assesses the industry, landscape to identify new business opportunities
|
57 |
+
and drive the continual development of the organisation's learning products and
|
58 |
+
services portfolio. He champions the organisations service excellence aspirations
|
59 |
+
and fosters strategic relationships with stakeholders. He is accountable for the
|
60 |
+
success of the organisation and is responsible for driving the, organisations
|
61 |
+
financial, innovation and productivity strategies. He has a strong understanding
|
62 |
+
of market and industry developments, including research developments and technology
|
63 |
+
innovations. He is an inspirational leader with a forward-thinking mindset and
|
64 |
+
a deep passion for learning and development. He establishes and communicates a
|
65 |
+
clear vision, and is highly skilled in influencing and engaging stakeholders to
|
66 |
+
secure their buy-in and support. He has strong business acumen and is able to
|
67 |
+
make calculated-risk decisions, performing effectively in a complex and difficult
|
68 |
+
environment. He frequently works outside of the office, attends industry events
|
69 |
+
and client meetings to develop networks and build strong business relationships.
|
70 |
+
sentences:
|
71 |
+
- Job opening for a General Manager responsible for establishing the vision and
|
72 |
+
strategic direction of a property, leading organizational strategies to enhance
|
73 |
+
business growth and operational excellence, and managing various functional areas
|
74 |
+
to ensure exceptional guest experiences and brand integrity.
|
75 |
+
- Director of Learning and Development responsible for shaping the educational vision,
|
76 |
+
setting strategic objectives, and promoting growth within the organization. Evaluates
|
77 |
+
market trends to uncover new opportunities and enhances the portfolio of learning
|
78 |
+
products and services. Advocates for excellence in service delivery and builds
|
79 |
+
strategic partnerships with key stakeholders. Oversees the organization's success
|
80 |
+
by implementing financial, innovation, and productivity initiatives. Possesses
|
81 |
+
a comprehensive understanding of industry advancements, including research and
|
82 |
+
technological innovations. An inspiring leader with a proactive approach and a
|
83 |
+
genuine enthusiasm for education and development. Clearly articulates a vision
|
84 |
+
and excels in engaging stakeholders to gain their support. Demonstrates strong
|
85 |
+
business insight and makes informed decisions in challenging environments, often
|
86 |
+
networking at industry events and client engagements.
|
87 |
+
- Junior Project Coordinator in a technology firm responsible for supporting project
|
88 |
+
management efforts, assisting in the development of project goals and tracking
|
89 |
+
progress. Evaluates project timelines and identifies potential risks to ensure
|
90 |
+
successful completion of tasks. Supports the team in delivering high-quality service
|
91 |
+
and maintains relationships with clients. Responsible for the day-to-day operations
|
92 |
+
of project management, including budget tracking and resource allocation. Holds
|
93 |
+
a basic understanding of project management methodologies and tools. A collaborative
|
94 |
+
team member with a focus on assisting in the execution of project plans and achieving
|
95 |
+
team objectives. Communicates effectively with team members and stakeholders to
|
96 |
+
ensure alignment and support. Demonstrates foundational business skills and addresses
|
97 |
+
challenges in a structured manner, participating in team meetings and project
|
98 |
+
reviews.
|
99 |
+
- source_sentence: The Chief Engineer /Senior Engineering Manager (Mechanical and
|
100 |
+
Electrical) leads and facilitates the implementation of mechanical and electrical
|
101 |
+
maintenance regime within the organisation. He/She works closely with internal
|
102 |
+
and external stakeholders in implementing new engineering initiatives to enhance
|
103 |
+
the reliability of mechanical and electrical systems. He demonstrates his technical
|
104 |
+
expertise in providing advice to cross-disciplinary engineering studies. His role
|
105 |
+
also, includes establishing competency standards and engineering standards to
|
106 |
+
ensure staff are equipped with relevant skills. He has a strong understanding
|
107 |
+
of rail operational activities, industry developments and regulatory requirements
|
108 |
+
of mechanical and electrical systems. He also maintains a forward-thinking mindset
|
109 |
+
to contribute strategically towards achieving the department goals.
|
110 |
+
sentences:
|
111 |
+
- Junior Electrical Technician responsible for performing routine inspections and
|
112 |
+
maintenance on electrical systems within residential buildings. This role requires
|
113 |
+
basic troubleshooting skills and adherence to safety regulations, while collaborating
|
114 |
+
with contractors to ensure compliance with local electrical codes. The technician
|
115 |
+
will also assist in documenting maintenance activities and support senior staff
|
116 |
+
in project implementations.
|
117 |
+
- Restructuring and Insolvency Executive responsible for managing client engagements,
|
118 |
+
overseeing daily operations, and ensuring quality assurance in processes. Expected
|
119 |
+
to develop business relationships, interact with stakeholders, and deliver projects
|
120 |
+
on time while demonstrating strong technical expertise and project management
|
121 |
+
skills in a fast-paced environment.
|
122 |
+
- Senior Engineering Manager for Mechanical and Electrical Systems overseeing maintenance
|
123 |
+
strategies and collaborating with stakeholders to improve system reliability and
|
124 |
+
staff competency in engineering standards.
|
125 |
+
- source_sentence: The Procurement Manager leads the procurement function and is responsible
|
126 |
+
for establishing procurement plans, policies and processes required to obtain
|
127 |
+
the materials, equipment and services to deliver on marine manufacturing projects.
|
128 |
+
He/She leads improvement initiatives to enhance the procurement process and timeliness
|
129 |
+
of providing the necessary resources to complete projects on schedule. He has
|
130 |
+
good communication and negotiation skills for engaging vendors and other external
|
131 |
+
parties, and is able to balance the organisations needs, as the job encompasses
|
132 |
+
a strategic role in selecting new vendors, fostering relationships and managing
|
133 |
+
vendor performance and contract-related processes by liaising with legal and other
|
134 |
+
key stakeholders.
|
135 |
+
sentences:
|
136 |
+
- Procurement lead responsible for developing plans and processes to acquire materials
|
137 |
+
and services for marine manufacturing projects, while improving procurement efficiency
|
138 |
+
and vendor management through strong communication and negotiation skills.
|
139 |
+
- Junior Supply Chain Coordinator responsible for managing inventory levels and
|
140 |
+
ensuring timely delivery of products within the retail industry. This role involves
|
141 |
+
collaborating with suppliers, tracking shipments, and maintaining accurate records
|
142 |
+
while focusing on cost reduction and efficiency improvements. Strong analytical
|
143 |
+
and problem-solving skills are essential for optimizing supply chain processes.
|
144 |
+
- Director of Aircraft Engine Maintenance is tasked with establishing the long-term
|
145 |
+
strategic vision to enhance business growth in alignment with the organization's
|
146 |
+
mission and values. This role involves advocating for innovative aircraft engine
|
147 |
+
maintenance programs to boost competitiveness and guiding the organization in
|
148 |
+
leading maintenance practices. The director engages with customers, stakeholders,
|
149 |
+
and partners while ensuring compliance with airworthiness regulations, promoting
|
150 |
+
a safe workplace culture, and championing quality and risk management initiatives.
|
151 |
+
The position requires inspiring continuous improvement, driving digital advancements,
|
152 |
+
and assessing strategies for a sustainable business model. Strong leadership and
|
153 |
+
strategic partnership-building skills are essential, along with overseeing financial
|
154 |
+
strategies and talent management.
|
155 |
+
- source_sentence: The Designer is responsible for all phases of design projects and
|
156 |
+
ascertains the feasibility of implementing new products and services for the organisation.
|
157 |
+
He/She executes research to gather data and translates research insights into
|
158 |
+
design outcomes. He uncovers new design ideas, conducts feasibility tests on prototypes
|
159 |
+
and communicates results of design tests and project implementation. The Designer
|
160 |
+
works in a team. He is encouraged to uncover the latest trends in the industry
|
161 |
+
and develop deep business acumen to meet the needs of the organisation. He possesses
|
162 |
+
mastery of design fundamentals and technical skills to execute design concepts
|
163 |
+
required for products and services. He is able to work on multiple projects concurrently,
|
164 |
+
and deliver on expectations within tight deadlines. He may specialise as an Architect,
|
165 |
+
Landscape Architect/Landscape Designer, Interior Designer, Fashion Designer, Product
|
166 |
+
Designer, Furniture Designer, Graphic Designer and/or Interaction Designer, etc.
|
167 |
+
sentences:
|
168 |
+
- Job opening for a Designer responsible for managing all stages of design projects,
|
169 |
+
assessing the feasibility of new products and services. The role involves conducting
|
170 |
+
research to collect data and translating insights into effective design solutions.
|
171 |
+
The Designer will explore innovative design concepts, perform prototype feasibility
|
172 |
+
tests, and share findings on design evaluations and project execution. Collaboration
|
173 |
+
within a team is essential, as is staying updated on industry trends and developing
|
174 |
+
strong business insights to fulfill organizational needs. Proficiency in design
|
175 |
+
principles and technical skills is required to implement design ideas for various
|
176 |
+
products and services, while managing multiple projects under tight deadlines.
|
177 |
+
Specializations may include roles such as Architect, Landscape Designer, Interior
|
178 |
+
Designer, Fashion Designer, Product Designer, Furniture Designer, Graphic Designer,
|
179 |
+
or Interaction Designer.
|
180 |
+
- The Junior Risk Management Analyst is accountable for supporting various stages
|
181 |
+
of risk assessment projects and determining the viability of implementing new
|
182 |
+
compliance measures for the organization. He/She conducts market analysis to gather
|
183 |
+
insights and transforms those insights into actionable risk management strategies.
|
184 |
+
The Analyst identifies potential risk factors, performs assessments on risk mitigation
|
185 |
+
plans, and communicates findings on risk evaluations and project compliance. The
|
186 |
+
Analyst collaborates with cross-functional teams and is expected to stay informed
|
187 |
+
about regulatory changes in the industry and develop a strong understanding of
|
188 |
+
business operations to address organizational risks. A solid grasp of risk management
|
189 |
+
principles and analytical skills is necessary to execute risk assessments effectively,
|
190 |
+
while managing multiple tasks simultaneously and meeting project deadlines.
|
191 |
+
- Job opening for a Senior Manager in youth work management, responsible for overseeing
|
192 |
+
strategic initiatives, resource allocation, collaboration, and governance to enhance
|
193 |
+
operational efficiency. The role involves managing budgets, developing professional
|
194 |
+
development programs, and fostering relationships across various agencies. Ideal
|
195 |
+
candidates should have strong problem-solving skills and experience in institutional
|
196 |
+
settings, communities, and Voluntary Welfare Organizations.
|
197 |
+
pipeline_tag: sentence-similarity
|
198 |
+
library_name: sentence-transformers
|
199 |
+
---
|
200 |
+
|
201 |
+
# SentenceTransformer based on nomic-ai/modernbert-embed-base
|
202 |
+
|
203 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
204 |
+
|
205 |
+
## Model Details
|
206 |
+
|
207 |
+
### Model Description
|
208 |
+
- **Model Type:** Sentence Transformer
|
209 |
+
- **Base model:** [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) <!-- at revision d556a88e332558790b210f7bdbe87da2fa94a8d8 -->
|
210 |
+
- **Maximum Sequence Length:** 8192 tokens
|
211 |
+
- **Output Dimensionality:** 768 dimensions
|
212 |
+
- **Similarity Function:** Cosine Similarity
|
213 |
+
<!-- - **Training Dataset:** Unknown -->
|
214 |
+
<!-- - **Language:** Unknown -->
|
215 |
+
<!-- - **License:** Unknown -->
|
216 |
+
|
217 |
+
### Model Sources
|
218 |
+
|
219 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
220 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
221 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
222 |
+
|
223 |
+
### Full Model Architecture
|
224 |
+
|
225 |
+
```
|
226 |
+
SentenceTransformer(
|
227 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
|
228 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
229 |
+
(2): Normalize()
|
230 |
+
)
|
231 |
+
```
|
232 |
+
|
233 |
+
## Usage
|
234 |
+
|
235 |
+
### Direct Usage (Sentence Transformers)
|
236 |
+
|
237 |
+
First install the Sentence Transformers library:
|
238 |
+
|
239 |
+
```bash
|
240 |
+
pip install -U sentence-transformers
|
241 |
+
```
|
242 |
+
|
243 |
+
Then you can load this model and run inference.
|
244 |
+
```python
|
245 |
+
from sentence_transformers import SentenceTransformer
|
246 |
+
|
247 |
+
# Download from the 🤗 Hub
|
248 |
+
model = SentenceTransformer("dnth/ssf-retriever-modernbert-embed-base")
|
249 |
+
# Run inference
|
250 |
+
sentences = [
|
251 |
+
'The Designer is responsible for all phases of design projects and ascertains the feasibility of implementing new products and services for the organisation. He/She executes research to gather data and translates research insights into design outcomes. He uncovers new design ideas, conducts feasibility tests on prototypes and communicates results of design tests and project implementation. The Designer works in a team. He is encouraged to uncover the latest trends in the industry and develop deep business acumen to meet the needs of the organisation. He possesses mastery of design fundamentals and technical skills to execute design concepts required for products and services. He is able to work on multiple projects concurrently, and deliver on expectations within tight deadlines. He may specialise as an Architect, Landscape Architect/Landscape Designer, Interior Designer, Fashion Designer, Product Designer, Furniture Designer, Graphic Designer and/or Interaction Designer, etc.',
|
252 |
+
'Job opening for a Designer responsible for managing all stages of design projects, assessing the feasibility of new products and services. The role involves conducting research to collect data and translating insights into effective design solutions. The Designer will explore innovative design concepts, perform prototype feasibility tests, and share findings on design evaluations and project execution. Collaboration within a team is essential, as is staying updated on industry trends and developing strong business insights to fulfill organizational needs. Proficiency in design principles and technical skills is required to implement design ideas for various products and services, while managing multiple projects under tight deadlines. Specializations may include roles such as Architect, Landscape Designer, Interior Designer, Fashion Designer, Product Designer, Furniture Designer, Graphic Designer, or Interaction Designer.',
|
253 |
+
'The Junior Risk Management Analyst is accountable for supporting various stages of risk assessment projects and determining the viability of implementing new compliance measures for the organization. He/She conducts market analysis to gather insights and transforms those insights into actionable risk management strategies. The Analyst identifies potential risk factors, performs assessments on risk mitigation plans, and communicates findings on risk evaluations and project compliance. The Analyst collaborates with cross-functional teams and is expected to stay informed about regulatory changes in the industry and develop a strong understanding of business operations to address organizational risks. A solid grasp of risk management principles and analytical skills is necessary to execute risk assessments effectively, while managing multiple tasks simultaneously and meeting project deadlines.',
|
254 |
+
]
|
255 |
+
embeddings = model.encode(sentences)
|
256 |
+
print(embeddings.shape)
|
257 |
+
# [3, 768]
|
258 |
+
|
259 |
+
# Get the similarity scores for the embeddings
|
260 |
+
similarities = model.similarity(embeddings, embeddings)
|
261 |
+
print(similarities)
|
262 |
+
# tensor([[1.0000, 0.8963, 0.2004],
|
263 |
+
# [0.8963, 1.0000, 0.2645],
|
264 |
+
# [0.2004, 0.2645, 1.0000]])
|
265 |
+
```
|
266 |
+
|
267 |
+
<!--
|
268 |
+
### Direct Usage (Transformers)
|
269 |
+
|
270 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
271 |
+
|
272 |
+
</details>
|
273 |
+
-->
|
274 |
+
|
275 |
+
<!--
|
276 |
+
### Downstream Usage (Sentence Transformers)
|
277 |
+
|
278 |
+
You can finetune this model on your own dataset.
|
279 |
+
|
280 |
+
<details><summary>Click to expand</summary>
|
281 |
+
|
282 |
+
</details>
|
283 |
+
-->
|
284 |
+
|
285 |
+
<!--
|
286 |
+
### Out-of-Scope Use
|
287 |
+
|
288 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
289 |
+
-->
|
290 |
+
|
291 |
+
<!--
|
292 |
+
## Bias, Risks and Limitations
|
293 |
+
|
294 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
295 |
+
-->
|
296 |
+
|
297 |
+
<!--
|
298 |
+
### Recommendations
|
299 |
+
|
300 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
301 |
+
-->
|
302 |
+
|
303 |
+
## Training Details
|
304 |
+
|
305 |
+
### Training Dataset
|
306 |
+
|
307 |
+
#### Unnamed Dataset
|
308 |
+
|
309 |
+
* Size: 4,524 training samples
|
310 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
311 |
+
* Approximate statistics based on the first 1000 samples:
|
312 |
+
| | anchor | positive | negative |
|
313 |
+
|:--------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
314 |
+
| type | string | string | string |
|
315 |
+
| details | <ul><li>min: 57 tokens</li><li>mean: 168.21 tokens</li><li>max: 380 tokens</li></ul> | <ul><li>min: 22 tokens</li><li>mean: 66.42 tokens</li><li>max: 204 tokens</li></ul> | <ul><li>min: 33 tokens</li><li>mean: 82.38 tokens</li><li>max: 247 tokens</li></ul> |
|
316 |
+
* Samples:
|
317 |
+
| anchor | positive | negative |
|
318 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
319 |
+
| <code>The Material Handling Equipment (MHE) Operator/Forklift Operator/Warehouse Assistant/Inventory Assistant is responsible for sorting, routing and loading cargo to and from various warehousing or storage locations. Systematic and mechanically-inclined, he/she is also responsible for upholding quality standards, ensuring the safe and efficient operation of material-handling equipment and may also be required to support general warehouse operations. He is expected work with internal and external stakeholders to accomplish his work.</code> | <code>Looking for a Warehouse Operator or Forklift Operator to manage cargo sorting, routing, and loading at various storage facilities. The ideal candidate should be mechanically inclined, maintain quality standards, ensure safe operation of material handling equipment, and assist with overall warehouse tasks while collaborating with stakeholders.</code> | <code>Seeking a Junior Risk Management Analyst to assess and evaluate potential risks within the financial services sector. The candidate will be responsible for analyzing data, developing risk mitigation strategies, and ensuring compliance with industry regulations. Strong analytical skills and attention to detail are essential, as well as the ability to work with both internal teams and external clients.</code> |
|
320 |
+
| <code>The Social Service Assistant supports interventions for clients. He/She accompanies social service professionals on client visits and accompanies clients to and from their appointments activities. He provides administrative and logistical support for the implementation of programmes and supports the organisation of core programmes and services. He provides support for the implementation of the organisation's outreach plans and identifies potential platforms for outreach that are used by the target client group. He also supports the implementation of relevant frameworks, protocols and procedures in his own area of work. A proactive, resourceful and client-centric individual, the Social Service Assistant works in various voluntary welfare organisations, communities and institutional settings.</code> | <code>Social Service Assistant role involving client support and administrative assistance for social service programs, including accompanying professionals and clients to appointments, and aiding in outreach initiatives.</code> | <code>Junior Financial Analyst responsible for conducting market research and analyzing financial statements. The role involves preparing reports, assisting in budgeting processes, and supporting the finance team in various projects. The candidate should be detail-oriented, with strong analytical skills and proficiency in financial software, working primarily in the corporate finance sector.</code> |
|
321 |
+
| <code>The Marketing Communications Manager/Digital Marketing Manager/Public Relations Manager/Assistant Marketing Communications Manager/Assistant Digital Marketing Manager/Assistant Public Relations Manager manages the execution of the organisation's branding and marketing efforts across traditional and new media platforms to create positive customer engagement. He/She enhances the property's brand reputation and brand image consistency across all marketing channels and liaises with key stakeholders to develop marketing strategies and plans, leveraging on market research and analyses to identify marketing opportunities, target markets and marketing channels. He plans marketing campaigns and manages the execution of marketing and media-related activities and events. His responsibilities include developing campaign design concepts and ideas, marketing collaterals as well as measures to engage customers. He manages public communications as well, monitoring and responding to online comments, de...</code> | <code>Digital Marketing Manager overseeing branding and marketing initiatives across various media platforms to foster positive customer interaction. Responsible for enhancing brand reputation and ensuring consistent brand image across marketing channels while collaborating with stakeholders to formulate effective marketing strategies. Plans and executes marketing campaigns, manages media-related events, and develops engaging marketing materials. Also handles public communications, monitors online feedback, creates press releases, and responds to media inquiries. At the management level, guides team performance, manages budget forecasts, and evaluates new technologies to enhance customer engagement. Utilizes data analytics to identify market trends and develop successful marketing and digital initiatives in a dynamic environment.</code> | <code>Junior Public Relations Coordinator responsible for managing internal communications and employee engagement initiatives within the organization. Tasks include creating newsletters, organizing team-building events, and maintaining the company intranet. The role focuses on enhancing employee morale and promoting a positive workplace culture through various communication channels. Additionally, the coordinator assists in drafting internal memos and managing responses to employee feedback. This position involves collaborating with HR to develop training materials and support staff development programs. The coordinator also analyzes employee satisfaction surveys to identify areas for improvement and implement strategies to foster a supportive work environment.</code> |
|
322 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
323 |
+
```json
|
324 |
+
{
|
325 |
+
"scale": 20.0,
|
326 |
+
"similarity_fct": "cos_sim"
|
327 |
+
}
|
328 |
+
```
|
329 |
+
|
330 |
+
### Evaluation Dataset
|
331 |
+
|
332 |
+
#### Unnamed Dataset
|
333 |
+
|
334 |
+
* Size: 1,131 evaluation samples
|
335 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
336 |
+
* Approximate statistics based on the first 1000 samples:
|
337 |
+
| | anchor | positive | negative |
|
338 |
+
|:--------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
339 |
+
| type | string | string | string |
|
340 |
+
| details | <ul><li>min: 57 tokens</li><li>mean: 167.67 tokens</li><li>max: 349 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 67.35 tokens</li><li>max: 204 tokens</li></ul> | <ul><li>min: 33 tokens</li><li>mean: 82.88 tokens</li><li>max: 198 tokens</li></ul> |
|
341 |
+
* Samples:
|
342 |
+
| anchor | positive | negative |
|
343 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
344 |
+
| <code>The HSE Specialist implements and administers the organisations Workplace Safety and Health (WSH) and Environmental Management System (EMS) programmes in the workplace and supports the development of WSH and EMS frameworks, in compliance with pertinent government regulations and organisational health, safety and environmental guidelines. The HSE Specialist supports the development and implementation of Safe System of Work (SSoW) processes and procedures at the workplace, and interfaces with the production and process engineering department, engineering and maintenance department, and contractors to ensure compliance. He/She assists in the development of emergency response and crisis management plans and works to identify and reduce known risks that could lead to emergencies or crises. He also supports the implementation of continuous improvement initiatives and activities at the workplace. The HSE Specialist is a good team player, has good written and verbal communication skills, inter...</code> | <code>Job opening for an HSE Specialist to manage and implement Workplace Safety and Health (WSH) and Environmental Management System (EMS) initiatives, ensuring compliance with government regulations and internal guidelines. Responsibilities include developing Safe System of Work (SSoW) procedures, collaborating with engineering and maintenance teams, and assisting in emergency response planning. The ideal candidate will have strong communication skills and a passion for training and continuous improvement.</code> | <code>Seeking a Junior Risk Management Analyst to oversee the implementation and administration of Workplace Safety and Health (WSH) protocols and Environmental Management System (EMS) standards in a healthcare setting. This role requires the development of risk assessment frameworks and the execution of compliance audits with relevant health regulations. The Junior Analyst will work closely with clinical teams and administrative departments to ensure adherence to safety guidelines and will assist in creating training materials for staff. Strong analytical skills and effective communication are essential for this position.</code> |
|
345 |
+
| <code>The Membership Crew assists in the execution of plans covering membership acquisition and member retention. He/She is involved in the sale of memberships and monitors the membership statistics in accordance with the parameters set by management, monitors the budgeting and forecasting of membership rates and conducts training for operations staff on membership-related issues. Resourceful and service-oriented, he leverages his strong interpersonal and communication skills to answer queries from current and potential members regarding membership-related procedures and policies. He is also able to assist with any customer queries directed to him by his team members. Able to work on a shift system, he is comfortable with working in an outdoor environment, be on his feet for long hours and maintains a flexible work-week including weekends, evenings and public holidays.</code> | <code>Membership Coordinator responsible for driving membership growth and retention strategies, managing sales of memberships, analyzing membership data according to management guidelines, overseeing budget forecasts, and providing training to staff on membership processes. Strong interpersonal and communication skills are essential for addressing inquiries from current and prospective members, as well as assisting team members with customer service issues. Must be adaptable to a shift schedule and comfortable working outdoors, standing for extended periods, and available during weekends, evenings, and public holidays.</code> | <code>Junior Financial Analyst tasked with supporting the financial planning and analysis team in the healthcare sector. Responsibilities include assisting in budget preparation, forecasting financial performance, and analyzing variance reports. The role requires strong analytical skills to interpret financial data and produce reports for management review. The candidate should be proficient in Microsoft Excel and financial modeling, while also possessing excellent communication skills to collaborate with various departments. Flexibility in work hours is necessary, and the position may involve occasional weekend work for reporting deadlines.</code> |
|
346 |
+
| <code>The Senior Infant Educator plays an active role as a mentor to the Infant Educator team. He/She takes responsibility for coaching and leading the infant care team in the Centre. He plays an important role in the design and implementation of developmentally appropriate curricula and programmes for the day-to-day developmental and caregiving tasks for infants. He also leads the building of relationships and partnerships with stakeholders. He designs and implements family and community programmes, and contributes to the Centres culture of continuous learning, collaboration and collegiality, in line with its vision, mission and goals.</code> | <code>Senior Infant Care Supervisor responsible for mentoring the infant care team, leading curriculum development, and fostering relationships with families and stakeholders while promoting a culture of collaboration and continuous learning in the childcare center.</code> | <code>Junior Risk Management Analyst tasked with assessing and mitigating potential risks within the financial sector. This role involves analyzing data, preparing reports, and collaborating with various departments to ensure compliance and safety standards are met, while contributing to the overall risk management strategy of the organization.</code> |
|
347 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
348 |
+
```json
|
349 |
+
{
|
350 |
+
"scale": 20.0,
|
351 |
+
"similarity_fct": "cos_sim"
|
352 |
+
}
|
353 |
+
```
|
354 |
+
|
355 |
+
### Training Hyperparameters
|
356 |
+
#### Non-Default Hyperparameters
|
357 |
+
|
358 |
+
- `eval_strategy`: steps
|
359 |
+
- `per_device_train_batch_size`: 16
|
360 |
+
- `gradient_accumulation_steps`: 2
|
361 |
+
- `weight_decay`: 0.1
|
362 |
+
- `num_train_epochs`: 5
|
363 |
+
- `lr_scheduler_type`: cosine
|
364 |
+
- `warmup_ratio`: 0.1
|
365 |
+
- `fp16`: True
|
366 |
+
- `load_best_model_at_end`: True
|
367 |
+
- `batch_sampler`: no_duplicates
|
368 |
+
|
369 |
+
#### All Hyperparameters
|
370 |
+
<details><summary>Click to expand</summary>
|
371 |
+
|
372 |
+
- `overwrite_output_dir`: False
|
373 |
+
- `do_predict`: False
|
374 |
+
- `eval_strategy`: steps
|
375 |
+
- `prediction_loss_only`: True
|
376 |
+
- `per_device_train_batch_size`: 16
|
377 |
+
- `per_device_eval_batch_size`: 8
|
378 |
+
- `per_gpu_train_batch_size`: None
|
379 |
+
- `per_gpu_eval_batch_size`: None
|
380 |
+
- `gradient_accumulation_steps`: 2
|
381 |
+
- `eval_accumulation_steps`: None
|
382 |
+
- `torch_empty_cache_steps`: None
|
383 |
+
- `learning_rate`: 5e-05
|
384 |
+
- `weight_decay`: 0.1
|
385 |
+
- `adam_beta1`: 0.9
|
386 |
+
- `adam_beta2`: 0.999
|
387 |
+
- `adam_epsilon`: 1e-08
|
388 |
+
- `max_grad_norm`: 1.0
|
389 |
+
- `num_train_epochs`: 5
|
390 |
+
- `max_steps`: -1
|
391 |
+
- `lr_scheduler_type`: cosine
|
392 |
+
- `lr_scheduler_kwargs`: {}
|
393 |
+
- `warmup_ratio`: 0.1
|
394 |
+
- `warmup_steps`: 0
|
395 |
+
- `log_level`: passive
|
396 |
+
- `log_level_replica`: warning
|
397 |
+
- `log_on_each_node`: True
|
398 |
+
- `logging_nan_inf_filter`: True
|
399 |
+
- `save_safetensors`: True
|
400 |
+
- `save_on_each_node`: False
|
401 |
+
- `save_only_model`: False
|
402 |
+
- `restore_callback_states_from_checkpoint`: False
|
403 |
+
- `no_cuda`: False
|
404 |
+
- `use_cpu`: False
|
405 |
+
- `use_mps_device`: False
|
406 |
+
- `seed`: 42
|
407 |
+
- `data_seed`: None
|
408 |
+
- `jit_mode_eval`: False
|
409 |
+
- `use_ipex`: False
|
410 |
+
- `bf16`: False
|
411 |
+
- `fp16`: True
|
412 |
+
- `fp16_opt_level`: O1
|
413 |
+
- `half_precision_backend`: auto
|
414 |
+
- `bf16_full_eval`: False
|
415 |
+
- `fp16_full_eval`: False
|
416 |
+
- `tf32`: None
|
417 |
+
- `local_rank`: 0
|
418 |
+
- `ddp_backend`: None
|
419 |
+
- `tpu_num_cores`: None
|
420 |
+
- `tpu_metrics_debug`: False
|
421 |
+
- `debug`: []
|
422 |
+
- `dataloader_drop_last`: False
|
423 |
+
- `dataloader_num_workers`: 0
|
424 |
+
- `dataloader_prefetch_factor`: None
|
425 |
+
- `past_index`: -1
|
426 |
+
- `disable_tqdm`: False
|
427 |
+
- `remove_unused_columns`: True
|
428 |
+
- `label_names`: None
|
429 |
+
- `load_best_model_at_end`: True
|
430 |
+
- `ignore_data_skip`: False
|
431 |
+
- `fsdp`: []
|
432 |
+
- `fsdp_min_num_params`: 0
|
433 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
434 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
435 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
436 |
+
- `deepspeed`: None
|
437 |
+
- `label_smoothing_factor`: 0.0
|
438 |
+
- `optim`: adamw_torch
|
439 |
+
- `optim_args`: None
|
440 |
+
- `adafactor`: False
|
441 |
+
- `group_by_length`: False
|
442 |
+
- `length_column_name`: length
|
443 |
+
- `ddp_find_unused_parameters`: None
|
444 |
+
- `ddp_bucket_cap_mb`: None
|
445 |
+
- `ddp_broadcast_buffers`: False
|
446 |
+
- `dataloader_pin_memory`: True
|
447 |
+
- `dataloader_persistent_workers`: False
|
448 |
+
- `skip_memory_metrics`: True
|
449 |
+
- `use_legacy_prediction_loop`: False
|
450 |
+
- `push_to_hub`: False
|
451 |
+
- `resume_from_checkpoint`: None
|
452 |
+
- `hub_model_id`: None
|
453 |
+
- `hub_strategy`: every_save
|
454 |
+
- `hub_private_repo`: None
|
455 |
+
- `hub_always_push`: False
|
456 |
+
- `hub_revision`: None
|
457 |
+
- `gradient_checkpointing`: False
|
458 |
+
- `gradient_checkpointing_kwargs`: None
|
459 |
+
- `include_inputs_for_metrics`: False
|
460 |
+
- `include_for_metrics`: []
|
461 |
+
- `eval_do_concat_batches`: True
|
462 |
+
- `fp16_backend`: auto
|
463 |
+
- `push_to_hub_model_id`: None
|
464 |
+
- `push_to_hub_organization`: None
|
465 |
+
- `mp_parameters`:
|
466 |
+
- `auto_find_batch_size`: False
|
467 |
+
- `full_determinism`: False
|
468 |
+
- `torchdynamo`: None
|
469 |
+
- `ray_scope`: last
|
470 |
+
- `ddp_timeout`: 1800
|
471 |
+
- `torch_compile`: False
|
472 |
+
- `torch_compile_backend`: None
|
473 |
+
- `torch_compile_mode`: None
|
474 |
+
- `include_tokens_per_second`: False
|
475 |
+
- `include_num_input_tokens_seen`: False
|
476 |
+
- `neftune_noise_alpha`: None
|
477 |
+
- `optim_target_modules`: None
|
478 |
+
- `batch_eval_metrics`: False
|
479 |
+
- `eval_on_start`: False
|
480 |
+
- `use_liger_kernel`: False
|
481 |
+
- `liger_kernel_config`: None
|
482 |
+
- `eval_use_gather_object`: False
|
483 |
+
- `average_tokens_across_devices`: False
|
484 |
+
- `prompts`: None
|
485 |
+
- `batch_sampler`: no_duplicates
|
486 |
+
- `multi_dataset_batch_sampler`: proportional
|
487 |
+
- `router_mapping`: {}
|
488 |
+
- `learning_rate_mapping`: {}
|
489 |
+
|
490 |
+
</details>
|
491 |
+
|
492 |
+
### Training Logs
|
493 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
494 |
+
|:---------:|:-------:|:-------------:|:---------------:|
|
495 |
+
| 0.3534 | 50 | 0.049 | 0.0051 |
|
496 |
+
| 0.7067 | 100 | 0.0079 | 0.0038 |
|
497 |
+
| 1.0565 | 150 | 0.004 | 0.0032 |
|
498 |
+
| 1.4099 | 200 | 0.0048 | 0.0029 |
|
499 |
+
| 1.7633 | 250 | 0.0025 | 0.0023 |
|
500 |
+
| 2.1131 | 300 | 0.0024 | 0.0022 |
|
501 |
+
| 2.4664 | 350 | 0.004 | 0.0026 |
|
502 |
+
| 2.8198 | 400 | 0.0056 | 0.0023 |
|
503 |
+
| 3.1696 | 450 | 0.0013 | 0.0017 |
|
504 |
+
| **3.523** | **500** | **0.0014** | **0.0016** |
|
505 |
+
| 3.8763 | 550 | 0.0016 | 0.0019 |
|
506 |
+
| 4.2261 | 600 | 0.0013 | 0.0018 |
|
507 |
+
| 4.5795 | 650 | 0.0032 | 0.0017 |
|
508 |
+
| 4.9329 | 700 | 0.0011 | 0.0017 |
|
509 |
+
|
510 |
+
* The bold row denotes the saved checkpoint.
|
511 |
+
|
512 |
+
### Framework Versions
|
513 |
+
- Python: 3.12.8
|
514 |
+
- Sentence Transformers: 5.0.0
|
515 |
+
- Transformers: 4.54.1
|
516 |
+
- PyTorch: 2.7.1+cu126
|
517 |
+
- Accelerate: 1.9.0
|
518 |
+
- Datasets: 4.0.0
|
519 |
+
- Tokenizers: 0.21.4
|
520 |
+
|
521 |
+
## Citation
|
522 |
+
|
523 |
+
### BibTeX
|
524 |
+
|
525 |
+
#### Sentence Transformers
|
526 |
+
```bibtex
|
527 |
+
@inproceedings{reimers-2019-sentence-bert,
|
528 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
529 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
530 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
531 |
+
month = "11",
|
532 |
+
year = "2019",
|
533 |
+
publisher = "Association for Computational Linguistics",
|
534 |
+
url = "https://arxiv.org/abs/1908.10084",
|
535 |
+
}
|
536 |
+
```
|
537 |
+
|
538 |
+
#### MultipleNegativesRankingLoss
|
539 |
+
```bibtex
|
540 |
+
@misc{henderson2017efficient,
|
541 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
542 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
543 |
+
year={2017},
|
544 |
+
eprint={1705.00652},
|
545 |
+
archivePrefix={arXiv},
|
546 |
+
primaryClass={cs.CL}
|
547 |
+
}
|
548 |
+
```
|
549 |
+
|
550 |
+
<!--
|
551 |
+
## Glossary
|
552 |
+
|
553 |
+
*Clearly define terms in order to be accessible across audiences.*
|
554 |
+
-->
|
555 |
+
|
556 |
+
<!--
|
557 |
+
## Model Card Authors
|
558 |
+
|
559 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
560 |
+
-->
|
561 |
+
|
562 |
+
<!--
|
563 |
+
## Model Card Contact
|
564 |
+
|
565 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
566 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"ModernBertModel"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 50281,
|
8 |
+
"classifier_activation": "gelu",
|
9 |
+
"classifier_bias": false,
|
10 |
+
"classifier_dropout": 0.0,
|
11 |
+
"classifier_pooling": "mean",
|
12 |
+
"cls_token_id": 50281,
|
13 |
+
"decoder_bias": true,
|
14 |
+
"deterministic_flash_attn": false,
|
15 |
+
"embedding_dropout": 0.0,
|
16 |
+
"eos_token_id": 50282,
|
17 |
+
"global_attn_every_n_layers": 3,
|
18 |
+
"global_rope_theta": 160000.0,
|
19 |
+
"gradient_checkpointing": false,
|
20 |
+
"hidden_activation": "gelu",
|
21 |
+
"hidden_size": 768,
|
22 |
+
"initializer_cutoff_factor": 2.0,
|
23 |
+
"initializer_range": 0.02,
|
24 |
+
"intermediate_size": 1152,
|
25 |
+
"layer_norm_eps": 1e-05,
|
26 |
+
"local_attention": 128,
|
27 |
+
"local_rope_theta": 10000.0,
|
28 |
+
"max_position_embeddings": 8192,
|
29 |
+
"mlp_bias": false,
|
30 |
+
"mlp_dropout": 0.0,
|
31 |
+
"model_type": "modernbert",
|
32 |
+
"norm_bias": false,
|
33 |
+
"norm_eps": 1e-05,
|
34 |
+
"num_attention_heads": 12,
|
35 |
+
"num_hidden_layers": 22,
|
36 |
+
"pad_token_id": 50283,
|
37 |
+
"position_embedding_type": "absolute",
|
38 |
+
"repad_logits_with_grad": false,
|
39 |
+
"sep_token_id": 50282,
|
40 |
+
"sparse_pred_ignore_index": -100,
|
41 |
+
"sparse_prediction": false,
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.54.1",
|
44 |
+
"vocab_size": 50368
|
45 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "5.0.0",
|
4 |
+
"transformers": "4.54.1",
|
5 |
+
"pytorch": "2.7.1+cu126"
|
6 |
+
},
|
7 |
+
"prompts": {
|
8 |
+
"query": "",
|
9 |
+
"document": ""
|
10 |
+
},
|
11 |
+
"default_prompt_name": null,
|
12 |
+
"similarity_fn_name": "cosine",
|
13 |
+
"model_type": "SentenceTransformer"
|
14 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ee08ea76e21cc14757f6c87ec704c5a7ba89cab17e01402aa25042694c4a992
|
3 |
+
size 596070136
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": true,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "|||IP_ADDRESS|||",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": true,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<|padding|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"50254": {
|
20 |
+
"content": " ",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": true,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": false
|
26 |
+
},
|
27 |
+
"50255": {
|
28 |
+
"content": " ",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
},
|
35 |
+
"50256": {
|
36 |
+
"content": " ",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": false
|
42 |
+
},
|
43 |
+
"50257": {
|
44 |
+
"content": " ",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"50258": {
|
52 |
+
"content": " ",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"50259": {
|
60 |
+
"content": " ",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": true,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"50260": {
|
68 |
+
"content": " ",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": true,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"50261": {
|
76 |
+
"content": " ",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": true,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": false
|
82 |
+
},
|
83 |
+
"50262": {
|
84 |
+
"content": " ",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": true,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": false
|
90 |
+
},
|
91 |
+
"50263": {
|
92 |
+
"content": " ",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": true,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": false
|
98 |
+
},
|
99 |
+
"50264": {
|
100 |
+
"content": " ",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": true,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": false
|
106 |
+
},
|
107 |
+
"50265": {
|
108 |
+
"content": " ",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": true,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": false
|
114 |
+
},
|
115 |
+
"50266": {
|
116 |
+
"content": " ",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": true,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": false
|
122 |
+
},
|
123 |
+
"50267": {
|
124 |
+
"content": " ",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": true,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": false
|
130 |
+
},
|
131 |
+
"50268": {
|
132 |
+
"content": " ",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": true,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": false
|
138 |
+
},
|
139 |
+
"50269": {
|
140 |
+
"content": " ",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": true,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": false
|
146 |
+
},
|
147 |
+
"50270": {
|
148 |
+
"content": " ",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": true,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": false
|
154 |
+
},
|
155 |
+
"50271": {
|
156 |
+
"content": " ",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": true,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": false
|
162 |
+
},
|
163 |
+
"50272": {
|
164 |
+
"content": " ",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": true,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": false
|
170 |
+
},
|
171 |
+
"50273": {
|
172 |
+
"content": " ",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": true,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": false
|
178 |
+
},
|
179 |
+
"50274": {
|
180 |
+
"content": " ",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": true,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": false
|
186 |
+
},
|
187 |
+
"50275": {
|
188 |
+
"content": " ",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": true,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": false
|
194 |
+
},
|
195 |
+
"50276": {
|
196 |
+
"content": " ",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": true,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": false
|
202 |
+
},
|
203 |
+
"50277": {
|
204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": true,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": false
|
210 |
+
},
|
211 |
+
"50278": {
|
212 |
+
"content": "|||PHONE_NUMBER|||",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": true,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": false
|
218 |
+
},
|
219 |
+
"50279": {
|
220 |
+
"content": "<|endoftext|>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"50280": {
|
228 |
+
"content": "[UNK]",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"50281": {
|
236 |
+
"content": "[CLS]",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"50282": {
|
244 |
+
"content": "[SEP]",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"50283": {
|
252 |
+
"content": "[PAD]",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"50284": {
|
260 |
+
"content": "[MASK]",
|
261 |
+
"lstrip": true,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"50285": {
|
268 |
+
"content": "[unused0]",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": true,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": false
|
274 |
+
},
|
275 |
+
"50286": {
|
276 |
+
"content": "[unused1]",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": true,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": false
|
282 |
+
},
|
283 |
+
"50287": {
|
284 |
+
"content": "[unused2]",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": true,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": false
|
290 |
+
},
|
291 |
+
"50288": {
|
292 |
+
"content": "[unused3]",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": true,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": false
|
298 |
+
},
|
299 |
+
"50289": {
|
300 |
+
"content": "[unused4]",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": true,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": false
|
306 |
+
},
|
307 |
+
"50290": {
|
308 |
+
"content": "[unused5]",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": true,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": false
|
314 |
+
},
|
315 |
+
"50291": {
|
316 |
+
"content": "[unused6]",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": true,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": false
|
322 |
+
},
|
323 |
+
"50292": {
|
324 |
+
"content": "[unused7]",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": true,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": false
|
330 |
+
},
|
331 |
+
"50293": {
|
332 |
+
"content": "[unused8]",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": true,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": false
|
338 |
+
},
|
339 |
+
"50294": {
|
340 |
+
"content": "[unused9]",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": true,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": false
|
346 |
+
},
|
347 |
+
"50295": {
|
348 |
+
"content": "[unused10]",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": true,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": false
|
354 |
+
},
|
355 |
+
"50296": {
|
356 |
+
"content": "[unused11]",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": true,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": false
|
362 |
+
},
|
363 |
+
"50297": {
|
364 |
+
"content": "[unused12]",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": true,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": false
|
370 |
+
},
|
371 |
+
"50298": {
|
372 |
+
"content": "[unused13]",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": true,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": false
|
378 |
+
},
|
379 |
+
"50299": {
|
380 |
+
"content": "[unused14]",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": true,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": false
|
386 |
+
},
|
387 |
+
"50300": {
|
388 |
+
"content": "[unused15]",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": true,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": false
|
394 |
+
},
|
395 |
+
"50301": {
|
396 |
+
"content": "[unused16]",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": true,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": false
|
402 |
+
},
|
403 |
+
"50302": {
|
404 |
+
"content": "[unused17]",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": true,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": false
|
410 |
+
},
|
411 |
+
"50303": {
|
412 |
+
"content": "[unused18]",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": true,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"50304": {
|
420 |
+
"content": "[unused19]",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": true,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"50305": {
|
428 |
+
"content": "[unused20]",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": true,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"50306": {
|
436 |
+
"content": "[unused21]",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": true,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"50307": {
|
444 |
+
"content": "[unused22]",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": true,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"50308": {
|
452 |
+
"content": "[unused23]",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": true,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"50309": {
|
460 |
+
"content": "[unused24]",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": true,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"50310": {
|
468 |
+
"content": "[unused25]",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": true,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"50311": {
|
476 |
+
"content": "[unused26]",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": true,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"50312": {
|
484 |
+
"content": "[unused27]",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": true,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
+
},
|
491 |
+
"50313": {
|
492 |
+
"content": "[unused28]",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": true,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": false
|
498 |
+
},
|
499 |
+
"50314": {
|
500 |
+
"content": "[unused29]",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": true,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": false
|
506 |
+
},
|
507 |
+
"50315": {
|
508 |
+
"content": "[unused30]",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": true,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": false
|
514 |
+
},
|
515 |
+
"50316": {
|
516 |
+
"content": "[unused31]",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": true,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": false
|
522 |
+
},
|
523 |
+
"50317": {
|
524 |
+
"content": "[unused32]",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": true,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": false
|
530 |
+
},
|
531 |
+
"50318": {
|
532 |
+
"content": "[unused33]",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": true,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": false
|
538 |
+
},
|
539 |
+
"50319": {
|
540 |
+
"content": "[unused34]",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": true,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": false
|
546 |
+
},
|
547 |
+
"50320": {
|
548 |
+
"content": "[unused35]",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": true,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": false
|
554 |
+
},
|
555 |
+
"50321": {
|
556 |
+
"content": "[unused36]",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": true,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": false
|
562 |
+
},
|
563 |
+
"50322": {
|
564 |
+
"content": "[unused37]",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": true,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": false
|
570 |
+
},
|
571 |
+
"50323": {
|
572 |
+
"content": "[unused38]",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": true,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": false
|
578 |
+
},
|
579 |
+
"50324": {
|
580 |
+
"content": "[unused39]",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": true,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": false
|
586 |
+
},
|
587 |
+
"50325": {
|
588 |
+
"content": "[unused40]",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": true,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": false
|
594 |
+
},
|
595 |
+
"50326": {
|
596 |
+
"content": "[unused41]",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": true,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": false
|
602 |
+
},
|
603 |
+
"50327": {
|
604 |
+
"content": "[unused42]",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": true,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": false
|
610 |
+
},
|
611 |
+
"50328": {
|
612 |
+
"content": "[unused43]",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": true,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": false
|
618 |
+
},
|
619 |
+
"50329": {
|
620 |
+
"content": "[unused44]",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": true,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": false
|
626 |
+
},
|
627 |
+
"50330": {
|
628 |
+
"content": "[unused45]",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": true,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": false
|
634 |
+
},
|
635 |
+
"50331": {
|
636 |
+
"content": "[unused46]",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": true,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": false
|
642 |
+
},
|
643 |
+
"50332": {
|
644 |
+
"content": "[unused47]",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": true,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": false
|
650 |
+
},
|
651 |
+
"50333": {
|
652 |
+
"content": "[unused48]",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": true,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": false
|
658 |
+
},
|
659 |
+
"50334": {
|
660 |
+
"content": "[unused49]",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": true,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": false
|
666 |
+
},
|
667 |
+
"50335": {
|
668 |
+
"content": "[unused50]",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": true,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": false
|
674 |
+
},
|
675 |
+
"50336": {
|
676 |
+
"content": "[unused51]",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": true,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": false
|
682 |
+
},
|
683 |
+
"50337": {
|
684 |
+
"content": "[unused52]",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": true,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": false
|
690 |
+
},
|
691 |
+
"50338": {
|
692 |
+
"content": "[unused53]",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": true,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": false
|
698 |
+
},
|
699 |
+
"50339": {
|
700 |
+
"content": "[unused54]",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": true,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": false
|
706 |
+
},
|
707 |
+
"50340": {
|
708 |
+
"content": "[unused55]",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": true,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": false
|
714 |
+
},
|
715 |
+
"50341": {
|
716 |
+
"content": "[unused56]",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": true,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": false
|
722 |
+
},
|
723 |
+
"50342": {
|
724 |
+
"content": "[unused57]",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": true,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": false
|
730 |
+
},
|
731 |
+
"50343": {
|
732 |
+
"content": "[unused58]",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": true,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": false
|
738 |
+
},
|
739 |
+
"50344": {
|
740 |
+
"content": "[unused59]",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": true,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": false
|
746 |
+
},
|
747 |
+
"50345": {
|
748 |
+
"content": "[unused60]",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": true,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": false
|
754 |
+
},
|
755 |
+
"50346": {
|
756 |
+
"content": "[unused61]",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": true,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": false
|
762 |
+
},
|
763 |
+
"50347": {
|
764 |
+
"content": "[unused62]",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": true,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": false
|
770 |
+
},
|
771 |
+
"50348": {
|
772 |
+
"content": "[unused63]",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": true,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": false
|
778 |
+
},
|
779 |
+
"50349": {
|
780 |
+
"content": "[unused64]",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": true,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": false
|
786 |
+
},
|
787 |
+
"50350": {
|
788 |
+
"content": "[unused65]",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": true,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": false
|
794 |
+
},
|
795 |
+
"50351": {
|
796 |
+
"content": "[unused66]",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": true,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": false
|
802 |
+
},
|
803 |
+
"50352": {
|
804 |
+
"content": "[unused67]",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": true,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": false
|
810 |
+
},
|
811 |
+
"50353": {
|
812 |
+
"content": "[unused68]",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": true,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": false
|
818 |
+
},
|
819 |
+
"50354": {
|
820 |
+
"content": "[unused69]",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": true,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": false
|
826 |
+
},
|
827 |
+
"50355": {
|
828 |
+
"content": "[unused70]",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": true,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"50356": {
|
836 |
+
"content": "[unused71]",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": true,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": false
|
842 |
+
},
|
843 |
+
"50357": {
|
844 |
+
"content": "[unused72]",
|
845 |
+
"lstrip": false,
|
846 |
+
"normalized": true,
|
847 |
+
"rstrip": false,
|
848 |
+
"single_word": false,
|
849 |
+
"special": false
|
850 |
+
},
|
851 |
+
"50358": {
|
852 |
+
"content": "[unused73]",
|
853 |
+
"lstrip": false,
|
854 |
+
"normalized": true,
|
855 |
+
"rstrip": false,
|
856 |
+
"single_word": false,
|
857 |
+
"special": false
|
858 |
+
},
|
859 |
+
"50359": {
|
860 |
+
"content": "[unused74]",
|
861 |
+
"lstrip": false,
|
862 |
+
"normalized": true,
|
863 |
+
"rstrip": false,
|
864 |
+
"single_word": false,
|
865 |
+
"special": false
|
866 |
+
},
|
867 |
+
"50360": {
|
868 |
+
"content": "[unused75]",
|
869 |
+
"lstrip": false,
|
870 |
+
"normalized": true,
|
871 |
+
"rstrip": false,
|
872 |
+
"single_word": false,
|
873 |
+
"special": false
|
874 |
+
},
|
875 |
+
"50361": {
|
876 |
+
"content": "[unused76]",
|
877 |
+
"lstrip": false,
|
878 |
+
"normalized": true,
|
879 |
+
"rstrip": false,
|
880 |
+
"single_word": false,
|
881 |
+
"special": false
|
882 |
+
},
|
883 |
+
"50362": {
|
884 |
+
"content": "[unused77]",
|
885 |
+
"lstrip": false,
|
886 |
+
"normalized": true,
|
887 |
+
"rstrip": false,
|
888 |
+
"single_word": false,
|
889 |
+
"special": false
|
890 |
+
},
|
891 |
+
"50363": {
|
892 |
+
"content": "[unused78]",
|
893 |
+
"lstrip": false,
|
894 |
+
"normalized": true,
|
895 |
+
"rstrip": false,
|
896 |
+
"single_word": false,
|
897 |
+
"special": false
|
898 |
+
},
|
899 |
+
"50364": {
|
900 |
+
"content": "[unused79]",
|
901 |
+
"lstrip": false,
|
902 |
+
"normalized": true,
|
903 |
+
"rstrip": false,
|
904 |
+
"single_word": false,
|
905 |
+
"special": false
|
906 |
+
},
|
907 |
+
"50365": {
|
908 |
+
"content": "[unused80]",
|
909 |
+
"lstrip": false,
|
910 |
+
"normalized": true,
|
911 |
+
"rstrip": false,
|
912 |
+
"single_word": false,
|
913 |
+
"special": false
|
914 |
+
},
|
915 |
+
"50366": {
|
916 |
+
"content": "[unused81]",
|
917 |
+
"lstrip": false,
|
918 |
+
"normalized": true,
|
919 |
+
"rstrip": false,
|
920 |
+
"single_word": false,
|
921 |
+
"special": false
|
922 |
+
},
|
923 |
+
"50367": {
|
924 |
+
"content": "[unused82]",
|
925 |
+
"lstrip": false,
|
926 |
+
"normalized": true,
|
927 |
+
"rstrip": false,
|
928 |
+
"single_word": false,
|
929 |
+
"special": false
|
930 |
+
}
|
931 |
+
},
|
932 |
+
"clean_up_tokenization_spaces": true,
|
933 |
+
"cls_token": "[CLS]",
|
934 |
+
"extra_special_tokens": {},
|
935 |
+
"mask_token": "[MASK]",
|
936 |
+
"model_input_names": [
|
937 |
+
"input_ids",
|
938 |
+
"attention_mask"
|
939 |
+
],
|
940 |
+
"model_max_length": 8192,
|
941 |
+
"pad_token": "[PAD]",
|
942 |
+
"sep_token": "[SEP]",
|
943 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
944 |
+
"unk_token": "[UNK]"
|
945 |
+
}
|