| | |
| | |  |
| | | |
| | | ## Quick start |
| | | To run the baseline, first you need to install FunASR and ModelScope. ([installation](https://alibaba-damo-academy.github.io/FunASR/en/installation.html)) |
| | | To run the baseline, first you need to install FunASR and ModelScope. ([installation](https://github.com/alibaba-damo-academy/FunASR#installation)) |
| | | There are two startup scripts, `run.sh` for training and evaluating on the old eval and test sets, and `run_m2met_2023_infer.sh` for inference on the new test set of the Multi-Channel Multi-Party Meeting Transcription 2.0 ([M2MeT2.0](https://alibaba-damo-academy.github.io/FunASR/m2met2/index.html)) Challenge. |
| | | Before running `run.sh`, you must manually download and unpack the [AliMeeting](http://www.openslr.org/119/) corpus and place it in the `./dataset` directory: |
| | | ```shell |
| | |
| | | |
| | | ## ç«èµæ¥å |
| | | |
| | | æ¥èªå¦æ¯çåå·¥ä¸ççææååèµè
ååºå¨2023å¹´5æ22æ¥åä¹åå¡«å䏿¹çè°·æè¡¨åãåæ¶æ¬¢è¿å¹¿å¤§åèµè
å å
¥[宿¹äº¤æµå¾®ä¿¡ç¾¤](https://alibaba-damo-academy.github.io/FunASR/m2met2_cn/%E8%81%94%E7%B3%BB%E6%96%B9%E5%BC%8F.html)交æµå¹¶åæ¶è·åç«èµææ°æ¶æ¯ï¼ |
| | | æ¥èªå¦æ¯çåå·¥ä¸ççææååèµè
ååºå¨2023å¹´5æ22æ¥åä¹åå¡«å䏿¹çè°·æè¡¨åï¼å¦ææ æ³è®¿é®å¯ä»¥éè¿é®ä»¶ï¼m2met.alimeeting@gmail.comï¼æ¥åï¼é®ä»¶éè¦å
å«åèµéåï¼éåï¼æå±æºæä»¥ååå èµéçä¿¡æ¯ãåæ¶æ¬¢è¿å¹¿å¤§åèµè
å å
¥[宿¹äº¤æµå¾®ä¿¡ç¾¤](https://alibaba-damo-academy.github.io/FunASR/m2met2_cn/%E8%81%94%E7%B3%BB%E6%96%B9%E5%BC%8F.html)交æµå¹¶åæ¶è·åç«èµææ°æ¶æ¯ï¼ |
| | | |
| | | [M2MeT2.0æ¥å](https://docs.google.com/forms/d/e/1FAIpQLSf77T9vAl7Ym-u5g8gXu18SBofoWRaFShBo26Ym0-HDxHW9PQ/viewform?usp=sf_link) |
| | | |
| | |
| | | Search.setIndex({"docnames": ["index", "\u57fa\u7ebf", "\u6570\u636e\u96c6", "\u7b80\u4ecb", "\u7ec4\u59d4\u4f1a", "\u8054\u7cfb\u65b9\u5f0f", "\u89c4\u5219", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30"], "filenames": ["index.rst", "\u57fa\u7ebf.md", "\u6570\u636e\u96c6.md", "\u7b80\u4ecb.md", "\u7ec4\u59d4\u4f1a.md", "\u8054\u7cfb\u65b9\u5f0f.md", "\u89c4\u5219.md", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30.md"], "titles": ["ASRU 2023 \u591a\u901a\u9053\u591a\u65b9\u4f1a\u8bae\u8f6c\u5f55\u6311\u6218 2.0", "\u57fa\u7ebf", "\u6570\u636e\u96c6", "\u7b80\u4ecb", "\u7ec4\u59d4\u4f1a", "\u8054\u7cfb\u65b9\u5f0f", "\u7ade\u8d5b\u89c4\u5219", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30"], "terms": {"m2met": [0, 1, 3, 5, 7], "asru2023": [0, 3], "m2met2": [0, 3, 5, 7], "funasr": 1, "sa": 1, "asr": [1, 3, 7], "speakerencod": 1, "modelscop": [1, 7], "instal": 1, "run": 1, "sh": 1, "run_m2met_2023_inf": 1, "alimeet": [1, 5, 7], "dataset": 1, "eval_ali_far": 1, "eval_ali_near": 1, "test_ali_far": 1, "test_ali_near": 1, "train_ali_far": 1, "train_ali_near": 1, "test_2023_ali_far": 1, "16": [1, 3], "wav": 1, "scp": 1, "wav_raw": 1, "segment": 1, "utt2spk": 1, "spk2utt": 1, "data": 1, "aishel": [2, 7], "cn": [2, 4, 7], "celeb": [2, 7], "test": [2, 6, 7], "2023": [2, 3, 6, 7], "118": 2, "75": 2, "104": 2, "train": 2, "eval": [2, 6], "10": [2, 3, 7], "212": 2, "15": 2, "30": 2, "456": 2, "25": 2, "13": 2, "55": 2, "42": 2, "27": 2, "34": 2, "76": 2, "20": [2, 3], "textgrid": 2, "id": 2, "openslr": 2, "baselin": 2, "automat": 3, "speech": 3, "recognit": 3, "speaker": 3, "diariz": 3, "rich": 3, "transcript": 3, "evalu": 3, "chime": 3, "comput": 3, "hear": 3, "in": 3, "multisourc": 3, "environ": 3, "misp": 3, "multimod": 3, "inform": 3, "base": 3, "process": 3, "multi": 3, "channel": 3, "parti": 3, "meet": 3, "iassp2022": 3, "asru": 3, "29": 3, "11": 3, "22": 3, "26": 3, "session": 3, "12": 3, "workshop": 3, "challeng": 3, "lxie": 4, "nwpu": 4, "edu": 4, "kong": 4, "aik": 4, "lee": 4, "star": 4, "kongaik": 4, "ieee": 4, "org": 4, "zhiji": 4, "yzj": 4, "alibaba": 4, "inc": 4, "com": [4, 5], "sli": 4, "zsl": 4, "yanminqian": 4, "sjtu": 4, "zhuc": 4, "microsoft": 4, "wujian": 4, "ceo": 4, "buhui": 4, "aishelldata": 4, "gmail": 5, "cpcer": [6, 7], "las": 6, "rnnt": 6, "transform": 6, "aishell4": 7, "vad": 7, "cer": 7, "ins": 7, "sub": 7, "del": 7, "text": 7, "frac": 7, "mathcal": 7, "n_": 7, "total": 7, "time": 7, "100": 7, "hug": 7, "face": 7}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"asru": 0, "2023": 0, "alimeet": 2, "aoe": 3}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"ASRU 2023 \u591a\u901a\u9053\u591a\u65b9\u4f1a\u8bae\u8f6c\u5f55\u6311\u6218 2.0": [[0, "asru-2023-2-0"]], "\u76ee\u5f55:": [[0, null]], "\u57fa\u7ebf": [[1, "id1"]], "\u57fa\u7ebf\u6982\u8ff0": [[1, "id2"]], "\u5feb\u901f\u5f00\u59cb": [[1, "id3"]], "\u57fa\u7ebf\u7ed3\u679c": [[1, "id4"]], "\u6570\u636e\u96c6": [[2, "id1"]], "\u6570\u636e\u96c6\u6982\u8ff0": [[2, "id2"]], "Alimeeting\u6570\u636e\u96c6\u4ecb\u7ecd": [[2, "alimeeting"]], "\u83b7\u53d6\u6570\u636e": [[2, "id3"]], "\u7b80\u4ecb": [[3, "id1"]], "\u7ade\u8d5b\u4ecb\u7ecd": [[3, "id2"]], "\u65f6\u95f4\u5b89\u6392(AOE\u65f6\u95f4)": [[3, "aoe"]], "\u7ade\u8d5b\u62a5\u540d": [[3, "id3"]], "\u7ec4\u59d4\u4f1a": [[4, "id1"]], "\u8054\u7cfb\u65b9\u5f0f": [[5, "id1"]], "\u7ade\u8d5b\u89c4\u5219": [[6, "id1"]], "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30": [[7, "id1"]], "\u8bf4\u8bdd\u4eba\u76f8\u5173\u7684\u8bed\u97f3\u8bc6\u522b": [[7, "id2"]], "\u8bc4\u4f30\u65b9\u6cd5": [[7, "id3"]], "\u5b50\u8d5b\u9053\u8bbe\u7f6e": [[7, "id4"]], "\u5b50\u8d5b\u9053\u4e00 (\u9650\u5b9a\u8bad\u7ec3\u6570\u636e):": [[7, "id5"]], "\u5b50\u8d5b\u9053\u4e8c (\u5f00\u653e\u8bad\u7ec3\u6570\u636e):": [[7, "id6"]]}, "indexentries": {}}) |
| | | Search.setIndex({"docnames": ["index", "\u57fa\u7ebf", "\u6570\u636e\u96c6", "\u7b80\u4ecb", "\u7ec4\u59d4\u4f1a", "\u8054\u7cfb\u65b9\u5f0f", "\u89c4\u5219", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30"], "filenames": ["index.rst", "\u57fa\u7ebf.md", "\u6570\u636e\u96c6.md", "\u7b80\u4ecb.md", "\u7ec4\u59d4\u4f1a.md", "\u8054\u7cfb\u65b9\u5f0f.md", "\u89c4\u5219.md", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30.md"], "titles": ["ASRU 2023 \u591a\u901a\u9053\u591a\u65b9\u4f1a\u8bae\u8f6c\u5f55\u6311\u6218 2.0", "\u57fa\u7ebf", "\u6570\u636e\u96c6", "\u7b80\u4ecb", "\u7ec4\u59d4\u4f1a", "\u8054\u7cfb\u65b9\u5f0f", "\u7ade\u8d5b\u89c4\u5219", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30"], "terms": {"m2met": [0, 1, 3, 5, 7], "asru2023": [0, 3], "m2met2": [0, 3, 5, 7], "funasr": 1, "sa": 1, "asr": [1, 3, 7], "speakerencod": 1, "modelscop": [1, 7], "instal": 1, "run": 1, "sh": 1, "run_m2met_2023_inf": 1, "alimeet": [1, 3, 5, 7], "dataset": 1, "eval_ali_far": 1, "eval_ali_near": 1, "test_ali_far": 1, "test_ali_near": 1, "train_ali_far": 1, "train_ali_near": 1, "test_2023_ali_far": 1, "16": [1, 3], "wav": 1, "scp": 1, "wav_raw": 1, "segment": 1, "utt2spk": 1, "spk2utt": 1, "data": 1, "aishel": [2, 7], "cn": [2, 4, 7], "celeb": [2, 7], "test": [2, 6, 7], "2023": [2, 3, 6, 7], "118": 2, "75": 2, "104": 2, "train": 2, "eval": [2, 6], "10": [2, 3, 7], "212": 2, "15": 2, "30": 2, "456": 2, "25": 2, "13": 2, "55": 2, "42": 2, "27": 2, "34": 2, "76": 2, "20": [2, 3], "textgrid": 2, "id": 2, "openslr": 2, "baselin": 2, "automat": 3, "speech": 3, "recognit": 3, "speaker": 3, "diariz": 3, "rich": 3, "transcript": 3, "evalu": 3, "chime": 3, "comput": 3, "hear": 3, "in": 3, "multisourc": 3, "environ": 3, "misp": 3, "multimod": 3, "inform": 3, "base": 3, "process": 3, "multi": 3, "channel": 3, "parti": 3, "meet": 3, "iassp2022": 3, "asru": 3, "29": 3, "11": 3, "22": 3, "26": 3, "session": 3, "12": 3, "workshop": 3, "challeng": 3, "gmail": [3, 5], "com": [3, 4, 5], "lxie": 4, "nwpu": 4, "edu": 4, "kong": 4, "aik": 4, "lee": 4, "star": 4, "kongaik": 4, "ieee": 4, "org": 4, "zhiji": 4, "yzj": 4, "alibaba": 4, "inc": 4, "sli": 4, "zsl": 4, "yanminqian": 4, "sjtu": 4, "zhuc": 4, "microsoft": 4, "wujian": 4, "ceo": 4, "buhui": 4, "aishelldata": 4, "cpcer": [6, 7], "las": 6, "rnnt": 6, "transform": 6, "aishell4": 7, "vad": 7, "cer": 7, "ins": 7, "sub": 7, "del": 7, "text": 7, "frac": 7, "mathcal": 7, "n_": 7, "total": 7, "time": 7, "100": 7, "hug": 7, "face": 7}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"asru": 0, "2023": 0, "alimeet": 2, "aoe": 3}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"ASRU 2023 \u591a\u901a\u9053\u591a\u65b9\u4f1a\u8bae\u8f6c\u5f55\u6311\u6218 2.0": [[0, "asru-2023-2-0"]], "\u76ee\u5f55:": [[0, null]], "\u57fa\u7ebf": [[1, "id1"]], "\u57fa\u7ebf\u6982\u8ff0": [[1, "id2"]], "\u5feb\u901f\u5f00\u59cb": [[1, "id3"]], "\u57fa\u7ebf\u7ed3\u679c": [[1, "id4"]], "\u6570\u636e\u96c6": [[2, "id1"]], "\u6570\u636e\u96c6\u6982\u8ff0": [[2, "id2"]], "Alimeeting\u6570\u636e\u96c6\u4ecb\u7ecd": [[2, "alimeeting"]], "\u83b7\u53d6\u6570\u636e": [[2, "id3"]], "\u7b80\u4ecb": [[3, "id1"]], "\u7ade\u8d5b\u4ecb\u7ecd": [[3, "id2"]], "\u65f6\u95f4\u5b89\u6392(AOE\u65f6\u95f4)": [[3, "aoe"]], "\u7ade\u8d5b\u62a5\u540d": [[3, "id3"]], "\u7ec4\u59d4\u4f1a": [[4, "id1"]], "\u8054\u7cfb\u65b9\u5f0f": [[5, "id1"]], "\u7ade\u8d5b\u89c4\u5219": [[6, "id1"]], "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30": [[7, "id1"]], "\u8bf4\u8bdd\u4eba\u76f8\u5173\u7684\u8bed\u97f3\u8bc6\u522b": [[7, "id2"]], "\u8bc4\u4f30\u65b9\u6cd5": [[7, "id3"]], "\u5b50\u8d5b\u9053\u8bbe\u7f6e": [[7, "id4"]], "\u5b50\u8d5b\u9053\u4e00 (\u9650\u5b9a\u8bad\u7ec3\u6570\u636e):": [[7, "id5"]], "\u5b50\u8d5b\u9053\u4e8c (\u5f00\u653e\u8bad\u7ec3\u6570\u636e):": [[7, "id6"]]}, "indexentries": {}}) |
| | |
| | | </section> |
| | | <section id="id3"> |
| | | <h2>ç«èµæ¥å<a class="headerlink" href="#id3" title="æ¤æ é¢çæ°¸ä¹
龿¥">¶</a></h2> |
| | | <p>æ¥èªå¦æ¯çåå·¥ä¸ççææååèµè
ååºå¨2023å¹´5æ22æ¥åä¹åå¡«å䏿¹çè°·æè¡¨åãåæ¶æ¬¢è¿å¹¿å¤§åèµè
å å
¥<a class="reference external" href="https://alibaba-damo-academy.github.io/FunASR/m2met2_cn/%E8%81%94%E7%B3%BB%E6%96%B9%E5%BC%8F.html">宿¹äº¤æµå¾®ä¿¡ç¾¤</a>交æµå¹¶åæ¶è·åç«èµææ°æ¶æ¯ï¼</p> |
| | | <p>æ¥èªå¦æ¯çåå·¥ä¸ççææååèµè
ååºå¨2023å¹´5æ22æ¥åä¹åå¡«å䏿¹çè°·æè¡¨åï¼å¦ææ æ³è®¿é®å¯ä»¥éè¿é®ä»¶ï¼m2met.alimeeting@gmail.comï¼æ¥åï¼é®ä»¶éè¦å
å«åèµéåï¼éåï¼æå±æºæä»¥ååå èµéçä¿¡æ¯ãåæ¶æ¬¢è¿å¹¿å¤§åèµè
å å
¥<a class="reference external" href="https://alibaba-damo-academy.github.io/FunASR/m2met2_cn/%E8%81%94%E7%B3%BB%E6%96%B9%E5%BC%8F.html">宿¹äº¤æµå¾®ä¿¡ç¾¤</a>交æµå¹¶åæ¶è·åç«èµææ°æ¶æ¯ï¼</p> |
| | | <p><a class="reference external" href="https://docs.google.com/forms/d/e/1FAIpQLSf77T9vAl7Ym-u5g8gXu18SBofoWRaFShBo26Ym0-HDxHW9PQ/viewform?usp=sf_link">M2MeT2.0æ¥å</a></p> |
| | | <p>䏻忹å°å¨3ä¸ªå·¥ä½æ¥å
éè¿çµåé®ä»¶éç¥ç¬¦åæ¡ä»¶çåèµå¢éï¼å¢éå¿
é¡»éµå®å°å¨ææç½ç«ä¸åå¸çææè§åã卿ååå¸ä¹åï¼æ¯ä¸ªåèµè
å¿
é¡»æäº¤ä¸ä»½ç³»ç»æè¿°æä»¶ï¼è¯¦ç»è¯´æä½¿ç¨çæ¹æ³å模åã䏻忹尿åååçéä¼çº³å
¥ASRU2023论æéã</p> |
| | | </section> |
| | |
| | |  |
| | | |
| | | ## å¿«éå¼å§ |
| | | é¦å
éè¦å®è£
FunASRåModelScope. ([installation](https://alibaba-damo-academy.github.io/FunASR/en/installation.html)) |
| | | é¦å
éè¦å®è£
FunASRåModelScope. ([installation](https://github.com/alibaba-damo-academy/FunASR#installation)) |
| | | åºçº¿ç³»ç»æè®ç»åæµè¯ä¸¤ä¸ªèæ¬,`run.sh`æ¯ç¨äºè®ç»åºçº¿ç³»ç»å¹¶å¨M2MeTçéªè¯ä¸æµè¯éä¸è¯ä¼°çï¼è`run_m2met_2023_infer.sh`ç¨äºæ¤æ¬¡ç«èµé¢å¤å¼æ¾çå
¨æ°æµè¯é䏿µè¯åæ¶çæç¬¦åç«èµæç»æäº¤æ ¼å¼çæä»¶ã |
| | | å¨è¿è¡ `run.sh`åï¼éè¦èªè¡ä¸è½½å¹¶è§£å[AliMeeting](http://www.openslr.org/119/)æ°æ®éå¹¶æ¾ç½®äº`./dataset`ç®å½ä¸ï¼ |
| | | ```shell |
| | |
| | | |
| | | ## ç«èµæ¥å |
| | | |
| | | æ¥èªå¦æ¯çåå·¥ä¸ççææååèµè
ååºå¨2023å¹´5æ22æ¥åä¹åå¡«å䏿¹çè°·æè¡¨åãåæ¶æ¬¢è¿å¹¿å¤§åèµè
å å
¥[宿¹äº¤æµå¾®ä¿¡ç¾¤](https://alibaba-damo-academy.github.io/FunASR/m2met2_cn/%E8%81%94%E7%B3%BB%E6%96%B9%E5%BC%8F.html)交æµå¹¶åæ¶è·åç«èµææ°æ¶æ¯ï¼ |
| | | æ¥èªå¦æ¯çåå·¥ä¸ççææååèµè
ååºå¨2023å¹´5æ22æ¥åä¹åå¡«å䏿¹çè°·æè¡¨åï¼å¦ææ æ³è®¿é®å¯ä»¥éè¿é®ä»¶ï¼m2met.alimeeting@gmail.comï¼æ¥åï¼é®ä»¶éè¦å
å«åèµéåï¼éåï¼æå±æºæä»¥ååå èµéçä¿¡æ¯ãåæ¶æ¬¢è¿å¹¿å¤§åèµè
å å
¥[宿¹äº¤æµå¾®ä¿¡ç¾¤](https://alibaba-damo-academy.github.io/FunASR/m2met2_cn/%E8%81%94%E7%B3%BB%E6%96%B9%E5%BC%8F.html)交æµå¹¶åæ¶è·åç«èµææ°æ¶æ¯ï¼ |
| | | |
| | | [M2MeT2.0æ¥å](https://docs.google.com/forms/d/e/1FAIpQLSf77T9vAl7Ym-u5g8gXu18SBofoWRaFShBo26Ym0-HDxHW9PQ/viewform?usp=sf_link) |
| | | |
| | |
| | | # Get Started |
| | | Speaker Attributed Automatic Speech Recognition (SA-ASR) is a task proposed to solve "who spoke what". Specifically, the goal of SA-ASR is not only to obtain multi-speaker transcriptions, but also to identify the corresponding speaker for each utterance. The method used in this example is referenced in the paper: [End-to-End Speaker-Attributed ASR with Transformer](https://www.isca-speech.org/archive/pdfs/interspeech_2021/kanda21b_interspeech.pdf). |
| | | To run this receipe, first you need to install FunASR and ModelScope. ([installation](https://alibaba-damo-academy.github.io/FunASR/en/installation.html)) |
| | | To run this receipe, first you need to install FunASR and ModelScope. ([installation](https://github.com/alibaba-damo-academy/FunASR#installation)) |
| | | There are two startup scripts, `run.sh` for training and evaluating on the old eval and test sets, and `run_m2met_2023_infer.sh` for inference on the new test set of the Multi-Channel Multi-Party Meeting Transcription 2.0 ([M2MeT2.0](https://alibaba-damo-academy.github.io/FunASR/m2met2/index.html)) Challenge. |
| | | Before running `run.sh`, you must manually download and unpack the [AliMeeting](http://www.openslr.org/119/) corpus and place it in the `./dataset` directory: |
| | | ```shell |