Update README.md
Browse files
README.md
CHANGED
|
@@ -536,4 +536,83 @@ for y in result:
|
|
| 536 |
# {"store_name": "Trader Joe's"}
|
| 537 |
# {"names": ["John", "Mary", "James"]}
|
| 538 |
# {"names": ["JOHN", "MARY", "JAMES"], "female_names": ["MARY"]}
|
| 539 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
# {"store_name": "Trader Joe's"}
|
| 537 |
# {"names": ["John", "Mary", "James"]}
|
| 538 |
# {"names": ["JOHN", "MARY", "JAMES"], "female_names": ["MARY"]}
|
| 539 |
+
```
|
| 540 |
+
|
| 541 |
+
## Template Generation
|
| 542 |
+
If you want to convert existing schema files you have in other formats (e.g. XML, YAML, etc.) or start from an example, NuExtract 2 models can automatically generate this for you.
|
| 543 |
+
|
| 544 |
+
E.g. convert XML into a NuExtract template:
|
| 545 |
+
```python
|
| 546 |
+
def generate_template(description):
|
| 547 |
+
input_messages = [description]
|
| 548 |
+
input_content = prepare_inputs(
|
| 549 |
+
messages=input_messages,
|
| 550 |
+
image_paths=[],
|
| 551 |
+
tokenizer=tokenizer,
|
| 552 |
+
)
|
| 553 |
+
|
| 554 |
+
generation_config = {"do_sample": True, "temperature": 0.4, "max_new_tokens": 256}
|
| 555 |
+
|
| 556 |
+
with torch.no_grad():
|
| 557 |
+
result = nuextract_generate(
|
| 558 |
+
model=model,
|
| 559 |
+
tokenizer=tokenizer,
|
| 560 |
+
prompts=input_content['prompts'],
|
| 561 |
+
pixel_values_list=input_content['pixel_values_list'],
|
| 562 |
+
num_patches_list=input_content['num_patches_list'],
|
| 563 |
+
generation_config=generation_config
|
| 564 |
+
)
|
| 565 |
+
return result[0]
|
| 566 |
+
|
| 567 |
+
xml_template = """<SportResult>
|
| 568 |
+
<Date></Date>
|
| 569 |
+
<Sport></Sport>
|
| 570 |
+
<Venue></Venue>
|
| 571 |
+
<HomeTeam></HomeTeam>
|
| 572 |
+
<AwayTeam></AwayTeam>
|
| 573 |
+
<HomeScore></HomeScore>
|
| 574 |
+
<AwayScore></AwayScore>
|
| 575 |
+
<TopScorer></TopScorer>
|
| 576 |
+
</SportResult>"""
|
| 577 |
+
result = generate_template(xml_template)
|
| 578 |
+
|
| 579 |
+
print(result)
|
| 580 |
+
# {
|
| 581 |
+
# "SportResult": {
|
| 582 |
+
# "Date": "date-time",
|
| 583 |
+
# "Sport": "verbatim-string",
|
| 584 |
+
# "Venue": "verbatim-string",
|
| 585 |
+
# "HomeTeam": "verbatim-string",
|
| 586 |
+
# "AwayTeam": "verbatim-string",
|
| 587 |
+
# "HomeScore": "integer",
|
| 588 |
+
# "AwayScore": "integer",
|
| 589 |
+
# "TopScorer": "verbatim-string"
|
| 590 |
+
# }
|
| 591 |
+
# }
|
| 592 |
+
```
|
| 593 |
+
|
| 594 |
+
E.g. generate a template from natural language description:
|
| 595 |
+
```python
|
| 596 |
+
text = """Give me relevant info about startup companies mentioned."""
|
| 597 |
+
result = generate_template(text)
|
| 598 |
+
|
| 599 |
+
print(result)
|
| 600 |
+
# {
|
| 601 |
+
# "Startup_Companies": [
|
| 602 |
+
# {
|
| 603 |
+
# "Name": "verbatim-string",
|
| 604 |
+
# "Products": [
|
| 605 |
+
# "string"
|
| 606 |
+
# ],
|
| 607 |
+
# "Location": "verbatim-string",
|
| 608 |
+
# "Company_Type": [
|
| 609 |
+
# "Technology",
|
| 610 |
+
# "Finance",
|
| 611 |
+
# "Health",
|
| 612 |
+
# "Education",
|
| 613 |
+
# "Other"
|
| 614 |
+
# ]
|
| 615 |
+
# }
|
| 616 |
+
# ]
|
| 617 |
+
# }
|
| 618 |
+
```
|