bprec

منابع:

برای بارگذاری این مجموعه داده در TFDS از دستور زیر استفاده کنید:

ds = tfds.load('huggingface:bprec')
  • شرح :
Dataset consisting of Polish language texts annotated to recognize brand-product relations.
  • مجوز : مجوز شناخته شده ای وجود ندارد
  • نسخه : 1.1.0
  • تقسیم ها :
شکاف مثال ها
'banking' 561
'cosmetics' 2384
'electro' 382
'tele' 2391
  • امکانات :
{
    "id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "text": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "ner": {
        "feature": {
            "source": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            },
            "target": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            }
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    }
}

همه

برای بارگذاری این مجموعه داده در TFDS از دستور زیر استفاده کنید:

ds = tfds.load('huggingface:bprec/all')
  • شرح :
Dataset consisting of Polish language texts annotated to recognize brand-product relations.
  • مجوز : مجوز شناخته شده ای وجود ندارد
  • نسخه : 1.1.0
  • تقسیم ها :
شکاف مثال ها
'train' 5718
  • امکانات :
{
    "id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "category": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "text": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "ner": {
        "feature": {
            "source": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            },
            "target": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            }
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    }
}

دور

برای بارگذاری این مجموعه داده در TFDS از دستور زیر استفاده کنید:

ds = tfds.load('huggingface:bprec/tele')
  • شرح :
Dataset consisting of Polish language texts annotated to recognize brand-product relations.
  • مجوز : مجوز شناخته شده ای وجود ندارد
  • نسخه : 1.1.0
  • تقسیم ها :
شکاف مثال ها
'train' 2391
  • امکانات :
{
    "id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "category": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "text": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "ner": {
        "feature": {
            "source": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            },
            "target": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            }
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    }
}

الکترو

برای بارگذاری این مجموعه داده در TFDS از دستور زیر استفاده کنید:

ds = tfds.load('huggingface:bprec/electro')
  • شرح :
Dataset consisting of Polish language texts annotated to recognize brand-product relations.
  • مجوز : مجوز شناخته شده ای وجود ندارد
  • نسخه : 1.1.0
  • تقسیم ها :
شکاف مثال ها
'train' 382
  • امکانات :
{
    "id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "category": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "text": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "ner": {
        "feature": {
            "source": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            },
            "target": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            }
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    }
}

لوازم آرایشی

برای بارگذاری این مجموعه داده در TFDS از دستور زیر استفاده کنید:

ds = tfds.load('huggingface:bprec/cosmetics')
  • شرح :
Dataset consisting of Polish language texts annotated to recognize brand-product relations.
  • مجوز : مجوز شناخته شده ای وجود ندارد
  • نسخه : 1.1.0
  • تقسیم ها :
شکاف مثال ها
'train' 2384
  • امکانات :
{
    "id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "category": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "text": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "ner": {
        "feature": {
            "source": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            },
            "target": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            }
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    }
}

بانکداری

برای بارگذاری این مجموعه داده در TFDS از دستور زیر استفاده کنید:

ds = tfds.load('huggingface:bprec/banking')
  • شرح :
Dataset consisting of Polish language texts annotated to recognize brand-product relations.
  • مجوز : مجوز شناخته شده ای وجود ندارد
  • نسخه : 1.1.0
  • تقسیم ها :
شکاف مثال ها
'train' 561
  • امکانات :
{
    "id": {
        "dtype": "int32",
        "id": null,
        "_type": "Value"
    },
    "category": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "text": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "ner": {
        "feature": {
            "source": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            },
            "target": {
                "from": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "text": {
                    "dtype": "string",
                    "id": null,
                    "_type": "Value"
                },
                "to": {
                    "dtype": "int32",
                    "id": null,
                    "_type": "Value"
                },
                "type": {
                    "num_classes": 10,
                    "names": [
                        "PRODUCT_NAME",
                        "PRODUCT_NAME_IMP",
                        "PRODUCT_NO_BRAND",
                        "BRAND_NAME",
                        "BRAND_NAME_IMP",
                        "VERSION",
                        "PRODUCT_ADJ",
                        "BRAND_ADJ",
                        "LOCATION",
                        "LOCATION_IMP"
                    ],
                    "names_file": null,
                    "id": null,
                    "_type": "ClassLabel"
                }
            }
        },
        "length": -1,
        "id": null,
        "_type": "Sequence"
    }
}