xpqiu.github.io/pubs.html at master · Johnson221b/xpqiu.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html lang="utf-8">
	<head>
		<meta http-equiv="content-type" content="text/html; charset=utf-8" />
		<meta name="Author" content="Xipeng Qiu, "  />
		<meta name="description" content="Xipeng Qiu, Publication, " >
		<meta name="keywords" content="Xipeng Qiu, Machine Learning, Natural Language Processing, Computer Vision and Information Retrieval, Question Answering, Deep Learning" >
		<title>Xipeng Qiu's Selected Publications  </title>
		<link rel="stylesheet" href="my.css" type="text/css" />
		<style type="text/css">
			div.noshow { display: none;}
		</style>
		<script type="text/javascript">
			<!--


			function toggleInfo(articleid,info) {

				var entry = document.getElementById(articleid);
				var abs = document.getElementById('abs_'+articleid);
				var rev = document.getElementById('rev_'+articleid);
				var bib = document.getElementById('bib_'+articleid);

				if (abs && info == 'abstract') {
					if(abs.className.indexOf('abstract') != -1) {
						abs.className.indexOf('noshow') == -1?abs.className = 'abstract noshow':abs.className = 'abstract';
					}
				} else if (rev && info == 'review') {
					if(rev.className.indexOf('review') != -1) {
						rev.className.indexOf('noshow') == -1?rev.className = 'review noshow':rev.className = 'review';
					}
				} else if (bib && info == 'bibtex') {
					if(bib.className.indexOf('bibtex') != -1) {
						bib.className.indexOf('noshow') == -1?bib.className = 'bibtex noshow':bib.className = 'bibtex';
					}
				} else {
					return;
				}

				// check if one or the other is available
				var revshow = false;
				var absshow = false;
				var bibshow = false;
				(abs && abs.className.indexOf('noshow') == -1)? absshow = true: absshow = false;
				(rev && rev.className.indexOf('noshow') == -1)? revshow = true: revshow = false;
				(bib && bib.className == 'bibtex')? bibshow = true: bibshow = false;

				// highlight original entry
				if(entry) {
					if (revshow || absshow || bibshow) {
						entry.className = 'entry highlight show';
					} else {
						entry.className = 'entry show';
					}
				}

				// When there's a combination of abstract/review/bibtex showing, need to add class for correct styling
				if(absshow) {
					(revshow||bibshow)?abs.className = 'abstract nextshow':abs.className = 'abstract';
				}
				if (revshow) {
					bibshow?rev.className = 'review nextshow': rev.className = 'review';
				}

			}

			function showAll(){
				// first close all abstracts, reviews, etc.
				closeAllInfo();

				for (var i = 0; i < numEntries; i++){
					entryRows[i].className = 'entry show';
				}
			}

			function closeAllInfo(){
				for (var i=0; i < numInfo; i++){
					if (infoRows[i].className.indexOf('noshow') ==-1) {
						infoRows[i].className = infoRows[i].className + ' noshow';
					}
				}
			}


			-->
		</script>

	</head>
	<body>
		<div id="pagecell1">
			<!--pagecell1-->
			<div id="top">

				<h1><a href="en.html">Xipeng Qiu</a></h1>
				Professor, School of Computer Science, Fudan University


					<div id="top-right">
						<a href="index.html"> Home </a>  | <a href="students.html">Students</a> | <a href="pubs.html">Research Topics</a>  | <a href="en.html"> Enlish </a>
						</div>
					</div>

					<div id="left">
				<img src="xpqiu.jpg" alt="" /><!--width="285" height="325" /-->
				<p>&nbsp;</p>
				<div>
					<h2>Link</h2>
						<p ><a href="mailto:xpqiu@fudan.edu.cn" target="_blank"><i class="icon-share-alternitive"></i> Email</a></p>
						<p ><a href="http://github.com/xpqiu" target="_blank"><i class="icon-github"></i> Github</a></p>
						<p ><a href="http://weibo.com/xpqiu/" target="_blank"><i class="icon-weibo"></i> Weibo</a></p>
						<p ><a href="https://www.zhihu.com/people/xpqiu" target="_blank"><i class="icon-zhihu-square"></i> Zhihu</a></p>
						<h2>Contact</h2>
						<p class="list1" >Building 2X, No. 2005 Songhu Road,Shanghai, China</p>

					</div>
			</div>

							<div id="content">
<p> A more comprehensive publication list: <a href="https://scholar.google.com/citations?user=Pq4Yp_kAAAAJ&hl=en">Google Scholar</a></p>


<h4>Survey/Overview of NLP</h4>

<ol>
<li id="qiu2020:scts-ptms" class="entry">
	<strong><font color="#0071BF">Pre-trained Models for Natural Language Processing: A Survey</font></strong>,
	 SCIENCE CHINA Technological Sciences <strong><font color="#ff6666">(SCTS)</font></strong> , Vol. 63(10), pp. 1872–1897, Science China Press, 2020.
    <a href="javascript:toggleInfo('qiu2020:scts-ptms','bibtex')">[BibTeX]</a><a href="https://doi.org/10.1007/s11431-020-1647-3">[DOI]</a><a href="https://arxiv.org/pdf/2003.08271.pdf">[PDF]</a>
     <strong><font color="#ff6666"> 《中国科学：技术科学》2021年度高影响力论文奖</font></strong>
    <div class="author"><strong>Xipeng Qiu</strong>, TianXiang Sun, Yige Xu, Yunfan Shao, Ning Dai, Xuanjing Huang.</div>

</li>
<div id="bib_qiu2020:scts-ptms" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@article{qiu2020:scts-ptms,
  author = {Xipeng Qiu and TianXiang Sun and Yige Xu and Yunfan Shao and Ning Dai and Xuanjing Huang},
  title = {Pre-trained Models for Natural Language Processing: A Survey},
  journal = {SCIENCE CHINA Technological Sciences},
  publisher = {Science China Press},
  year = {2020},
  volume = {63},
  number = {10},
  pages = {1872–1897},
  doi = {https://doi.org/10.1007/s11431-020-1647-3}
}
</pre>
</div>

<li id="Sun2022" class="entry">
	<strong><font color="#0071BF">Paradigm Shift in Natural Language Processing</font></strong>,
	 Machine Intelligence Research , Vol. 19(3), pp. 169-183, 2022.
    <a href="javascript:toggleInfo('Sun2022','bibtex')">[BibTeX]</a><a href="https://doi.org/10.1007/s11633-022-1331-6">[DOI]</a>
<div id="abs_Sun2022" class="abstract noshow">
	<b>Abstract</b>: In the era of deep learning, modeling for most natural language processing (NLP) tasks has converged into several mainstream paradigms. For example, we usually adopt the sequence labeling paradigm to solve a bundle of tasks such as POS-tagging, named entity recognition (NER), and chunking, and adopt the classification paradigm to solve tasks like sentiment analysis. With the rapid progress of pre-trained language models, recent years have witnessed a rising trend of paradigm shift, which is solving one NLP task in a new paradigm by reformulating the task. The paradigm shift has achieved great success on many tasks and is becoming a promising way to improve model performance. Moreover, some of these paradigms have shown great potential to unify a large number of NLP tasks, making it possible to build a single model to handle diverse tasks. In this paper, we review such phenomenon of paradigm shifts in recent years, highlighting several paradigms that have the potential to solve different NLP tasks.</td>
</div>
    <a href="javascript:toggleInfo('Sun2022','abstract')">[Abstract]</a>

    <div class="author">Tian-Xiang Sun, Xiang-Yang Liu, <strong>Xi-Peng Qiu</strong>, Xuan-Jing Huang.</div>

</li>
<div id="bib_Sun2022" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@article{Sun2022,
  author = {Sun, Tian-Xiang and Liu, Xiang-Yang and Qiu, Xi-Peng and Huang, Xuan-Jing},
  title = {Paradigm Shift in Natural Language Processing},
  journal = {Machine Intelligence Research},
  year = {2022},
  volume = {19},
  number = {3},
  pages = {169--183},
  url = {https://doi.org/10.1007/s11633-022-1331-6},
  doi = {https://doi.org/10.1007/s11633-022-1331-6}
}
</pre>
</div>

<li id="lin2022survey" class="entry">
	<strong><font color="#0071BF">A survey of transformers</font></strong>,
	 AI Open , Elsevier, 2022.
    <a href="javascript:toggleInfo('lin2022survey','bibtex')">[BibTeX]</a><a href="https://doi.org/10.1016/j.aiopen.2022.10.001">[DOI]</a><a href="https://arxiv.org/pdf/2106.04554.pdf">[PDF]</a>


    <div class="author">Tianyang Lin, Yuxin Wang, Xiangyang Liu, <strong>Xipeng Qiu</strong>.</div>

</li>
<div id="bib_lin2022survey" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@article{lin2022survey,
  author = {Lin, Tianyang and Wang, Yuxin and Liu, Xiangyang and Qiu, Xipeng},
  title = {A survey of transformers},
  journal = {AI Open},
  publisher = {Elsevier},
  year = {2022},
  url = {https://arxiv.org/abs/2106.04554},
  doi = {https://doi.org/10.1016/j.aiopen.2022.10.001}
}
</pre>
</div>

</ol>

<h4>Foundation Models / Language-Model-as-a-Service (LMaaS)</h4>

<ol>

<li id="11" class="entry">
	<strong><font color="#0071BF">CPT: A Pre-Trained Unbalanced Transformer for Both Chinese Language Understanding and Generation</font></strong>,
	 SCIENCE CHINA Information Sciences <strong><font color="#ff6666">(SCIS)</font></strong> , 2022.
    <a href="javascript:toggleInfo('11','bibtex')">[BibTeX]</a><a href="https://doi.org/10.1007/s11432-021-3536-5">[DOI]</a><a href="https://arxiv.org/pdf/2109.05729.pdf">[PDF]</a>


    <div class="author">Yunfan Shao, Zhichao Geng, Yitao Liu, Junqi Dai, Hang Yan, Fei Yang, Li Zhe, Hujun Bao, <strong>Xipeng Qiu</strong>.</div>

</li>
<div id="bib_11" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@article{11,
  author = {Shao, Yunfan and Geng, Zhichao and Liu, Yitao and Dai, Junqi and Yan, Hang and Yang, Fei and Zhe, Li and Bao, Hujun and Qiu, Xipeng},
  title = {CPT: A Pre-Trained Unbalanced Transformer for Both Chinese Language Understanding and Generation},
  journal = {SCIENCE CHINA Information Sciences},
  year = {2022},
  url = {https://arxiv.org/abs/2109.05729},
  doi = {https://doi.org/10.1007/s11432-021-3536-5}
}
</pre>
</div>


<li id="sun2022black" class="entry">
	<strong><font color="#0071BF">Black-Box Tuning for Language-Model-as-a-Service</font></strong>,
	<strong><font color="#ff6666"> ICML</font></strong>, 2022.
    <a href="javascript:toggleInfo('sun2022black','bibtex')">[BibTeX]</a>
<div class="author">Tianxiang Sun, Yunfan Shao, Hong Qian, Xuanjing Huang, <strong>Xipeng Qiu</strong>.</div>
</li>
<div id="bib_sun2022black" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{sun2022black,
  author = {Sun, Tianxiang and Shao, Yunfan and Qian, Hong and Huang, Xuanjing and Qiu, Xipeng},
  title = {Black-Box Tuning for Language-Model-as-a-Service},
  booktitle = {International Conference on Machine Learning},
  year = {2022},
  volume = {162},
  pages = {20841--20855},
  url = {https://proceedings.mlr.press/v162/sun22e.html}
}
</pre>
</div>


<li id="" class="entry">
	<strong><font color="#0071BF">BBTv2: Towards a Gradient-Free Future with Large Language Models</font></strong>,
	<strong><font color="#ff6666"> EMNLP</font></strong>, 2022.
    <a href="javascript:toggleInfo('','bibtex')">[BibTeX]</a>
<div class="author">Tianxiang Sun, Zhengfu He, Hong Qian, Yunhua Zhou, Xuanjing Huang, <strong>Xipeng Qiu</strong>.</div>
</li>
<div id="bib_" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{,
  author = {Tianxiang Sun and Zhengfu He and Hong Qian and Yunhua Zhou and Xuanjing Huang and Xipeng Qiu},
  title = {BBTv2: Towards a Gradient-Free Future with Large Language Models},
  booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
  year = {2022},
  url = {https://doi.org/10.48550/arXiv.2205.11200}
}
</pre>
</div>

</ol>

<h4>Information Extraction</h4>

<ol>

<li id="yan2019tener" class="entry">
	<strong><font color="#0071BF">TENER: adapting transformer encoder for named entity recognition</font></strong>,
	 arXiv preprint arXiv:1911.04474 , 2019.
    <a href="javascript:toggleInfo('yan2019tener','bibtex')">[BibTeX]</a>


    <div class="author">Hang Yan, Bocao Deng, Xiaonan Li, <strong>Xipeng Qiu</strong>.</div>

</li>
<div id="bib_yan2019tener" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@article{yan2019tener,
  author = {Yan, Hang and Deng, Bocao and Li, Xiaonan and Qiu, Xipeng},
  title = {TENER: adapting transformer encoder for named entity recognition},
  journal = {arXiv preprint arXiv:1911.04474},
  year = {2019}
}
</pre>
</div>


<li id="li-etal-2020-flat" class="entry">
	<strong><font color="#0071BF">FLAT: Chinese NER Using Flat-Lattice Transformer</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2020.
    <a href="javascript:toggleInfo('li-etal-2020-flat','bibtex')">[BibTeX]</a><a href="https://arxiv.org/pdf/2004.11795.pdf">[PDF]</a><a href="https://github.com/LeeSureman/Flat-Lattice-Transformer">[Code]</a><a href="javascript:toggleInfo('li-etal-2020-flat','abstract')">[Abstract]</a>
<div class="author">Xiaonan Li, Hang Yan, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_li-etal-2020-flat" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{li-etal-2020-flat,
  author = {Li, Xiaonan and Yan, Hang and Qiu, Xipeng and Huang, Xuanjing},
  title = {FLAT: Chinese NER Using Flat-Lattice Transformer},
  booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
  year = {2020},
  pages = {6836--6842},
  url = {https://www.aclweb.org/anthology/2020.acl-main.611}
}
</pre>
</div>

<div id="abs_li-etal-2020-flat" class="abstract noshow">
	<b>Abstract</b>: Recently, the character-word lattice structure has been proved to be effective for Chinese named entity recognition (NER) by incorporating the word information. However, since the lattice structure is complex and dynamic, the lattice-based models are hard to fully utilize the parallel computation of GPUs and usually have a low inference speed. In this paper, we propose FLAT: Flat-LAttice Transformer for Chinese NER, which converts the lattice structure into a flat structure consisting of spans. Each span corresponds to a character or latent word and its position in the original lattice. With the power of Transformer and well-designed position encoding, FLAT can fully leverage the lattice information and has an excellent parallel ability. Experiments on four datasets show FLAT outperforms other lexicon-based models in performance and efficiency.
</div>
<li id="li-etal-2021-accelerating" class="entry">
	<strong><font color="#0071BF">Accelerating BERT Inference for Sequence Labeling via Early-Exit</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2021.
    <a href="javascript:toggleInfo('li-etal-2021-accelerating','bibtex')">[BibTeX]</a><a href="https://aclanthology.org/2021.acl-long.16.pdf">[PDF]</a><a href="javascript:toggleInfo('li-etal-2021-accelerating','abstract')">[Abstract]</a>
<div class="author">Xiaonan Li, Yunfan Shao, Tianxiang Sun, Hang Yan, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_li-etal-2021-accelerating" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{li-etal-2021-accelerating,
  author = {Li, Xiaonan and Shao, Yunfan and Sun, Tianxiang and Yan, Hang and Qiu, Xipeng and Huang, Xuanjing},
  title = {Accelerating BERT Inference for Sequence Labeling via Early-Exit},
  booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  year = {2021},
  pages = {189--199},
  url = {https://aclanthology.org/2021.acl-long.16}
}
</pre>
</div>

<div id="abs_li-etal-2021-accelerating" class="abstract noshow">
	<b>Abstract</b>: Both performance and efficiency are crucial factors for sequence labeling tasks in many real-world scenarios. Although the pre-trained models (PTMs) have significantly improved the performance of various sequence labeling tasks, their computational cost is expensive. To alleviate this problem, we extend the recent successful early-exit mechanism to accelerate the inference of PTMs for sequence labeling tasks. However, existing early-exit mechanisms are specifically designed for sequence-level tasks, rather than sequence labeling. In this paper, we first propose a simple extension of sentence-level early-exit for sequence labeling tasks. To further reduce the computational cost, we also propose a token-level early-exit mechanism that allows partial tokens to exit early at different layers. Considering the local dependency inherent in sequence labeling, we employed a window-based criterion to decide for a token whether or not to exit. The token-level early-exit brings the gap between training and inference, so we introduce an extra self-sampling fine-tuning stage to alleviate it. The extensive experiments on three popular sequence labeling tasks show that our approach can save up to 66%鈭�75% inference cost with minimal performance degradation. Compared with competitive compressed models such as DistilBERT, our approach can achieve better performance under the same speed-up ratios of 2{\times}, 3{\times}, and 4{\times}.
</div>
<li id="yan-etal-2021-unified-generative" class="entry">
	<strong><font color="#0071BF">A Unified Generative Framework for Various NER Subtasks</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2021.
    <a href="javascript:toggleInfo('yan-etal-2021-unified-generative','bibtex')">[BibTeX]</a><a href="https://aclanthology.org/2021.acl-long.451.pdf">[PDF]</a><a href="javascript:toggleInfo('yan-etal-2021-unified-generative','abstract')">[Abstract]</a>
<div class="author">Hang Yan, Tao Gui, Junqi Dai, Qipeng Guo, Zheng Zhang, <strong>Xipeng Qiu</strong>.</div>
</li>
<div id="bib_yan-etal-2021-unified-generative" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{yan-etal-2021-unified-generative,
  author = {Yan, Hang and Gui, Tao and Dai, Junqi and Guo, Qipeng and Zhang, Zheng and Qiu, Xipeng},
  title = {A Unified Generative Framework for Various NER Subtasks},
  booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  year = {2021},
  pages = {5808--5822},
  url = {https://aclanthology.org/2021.acl-long.451}
}
</pre>
</div>

<div id="abs_yan-etal-2021-unified-generative" class="abstract noshow">
	<b>Abstract</b>: Named Entity Recognition (NER) is the task of identifying spans that represent entities in sentences. Whether the entity spans are nested or discontinuous, the NER task can be categorized into the flat NER, nested NER, and discontinuous NER subtasks. These subtasks have been mainly solved by the token-level sequence labelling or span-level classification. However, these solutions can hardly tackle the three kinds of NER subtasks concurrently. To that end, we propose to formulate the NER subtasks as an entity span sequence generation task, which can be solved by a unified sequence-to-sequence (Seq2Seq) framework. Based on our unified framework, we can leverage the pre-trained Seq2Seq model to solve all three kinds of NER subtasks without the special design of the tagging schema or ways to enumerate spans. We exploit three types of entity representations to linearize entities into a sequence. Our proposed framework is easy-to-implement and achieves state-of-the-art (SoTA) or near SoTA performance on eight English NER datasets, including two flat NER datasets, three nested NER datasets, and three discontinuous NER datasets.
</div>
</ol>

<h4>Efficient NLP</h4>

<ol>
<li id="guo2019star" class="entry">
	<strong><font color="#0071BF">Star-Transformer</font></strong>,
	<strong><font color="#ff6666"> NAACL</font></strong>, 2019.
    <a href="javascript:toggleInfo('guo2019star','bibtex')">[BibTeX]</a><a href="https://arxiv.org/pdf/1902.09113.pdf">[PDF]</a><a href="javascript:toggleInfo('guo2019star','abstract')">[Abstract]</a>
<div class="author">Qipeng Guo, <strong>Xipeng Qiu</strong>, Pengfei Liu, Yunfan Shao, Xiangyang Xue, Zheng Zhang.</div>
</li>
<div id="bib_guo2019star" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{guo2019star,
  author = {Guo, Qipeng and Qiu, Xipeng and Liu, Pengfei and Shao, Yunfan and Xue, Xiangyang and Zhang, Zheng},
  title = {Star-Transformer},
  booktitle = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  year = {2019},
  pages = {1315--1325},
  url = {https://www.aclweb.org/anthology/N19-1133}
}
</pre>
</div>

<div id="abs_guo2019star" class="abstract noshow">
	<b>Abstract</b>: Although Transformer has achieved great successes on many NLP tasks, its heavy structure with fully-connected attention connections leads to dependencies on large training data. In this paper, we present Star-Transformer, a lightweight alternative by careful sparsification. To reduce model complexity, we replace the fully-connected structure with a star-shaped topology, in which every two non-adjacent nodes are connected through a shared relay node. Thus, complexity is reduced from quadratic to linear, while preserving the capacity to capture both local composition and long-range dependency. The experiments on four tasks (22 datasets) show that Star-Transformer achieved significant improvements against the standard Transformer for the modestly sized datasets.
</div>
<li id="guo2019low" class="entry">
	<strong><font color="#0071BF">Low-rank and Locality Constrained Self-Attention for Sequence Modeling</font></strong>,
	 IEEE/ACM Transactions on Audio, Speech, and Language Processing <strong><font color="#ff6666">(TASLP)</font></strong>, December, 2019 , Vol. 27(12), pp. 2213 - 2222, 2019.
    <a href="javascript:toggleInfo('guo2019low','bibtex')">[BibTeX]</a><a href="https://doi.org/10.1109/TASLP.2019.2944078">[DOI]</a>


    <div class="author">Qipeng Guo, <strong>Xipeng Qiu</strong>, Xiangyang Xue, Zheng Zhang.</div>

</li>
<div id="bib_guo2019low" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@article{guo2019low,
  author = {Guo, Qipeng and Qiu, Xipeng and Xue, Xiangyang and Zhang, Zheng},
  title = {Low-rank and Locality Constrained Self-Attention for Sequence Modeling},
  journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
  year = {2019},
  volume = {27},
  number = {12},
  pages = {2213 - 2222},
  doi = {https://doi.org/10.1109/TASLP.2019.2944078}
}
</pre>
</div>
<li id="li-etal-2021-accelerating" class="entry">
	<strong><font color="#0071BF">Accelerating BERT Inference for Sequence Labeling via Early-Exit</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2021.
    <a href="javascript:toggleInfo('li-etal-2021-accelerating','bibtex')">[BibTeX]</a><a href="https://aclanthology.org/2021.acl-long.16.pdf">[PDF]</a><a href="javascript:toggleInfo('li-etal-2021-accelerating','abstract')">[Abstract]</a>
<div class="author">Xiaonan Li, Yunfan Shao, Tianxiang Sun, Hang Yan, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_li-etal-2021-accelerating" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{li-etal-2021-accelerating,
  author = {Li, Xiaonan and Shao, Yunfan and Sun, Tianxiang and Yan, Hang and Qiu, Xipeng and Huang, Xuanjing},
  title = {Accelerating BERT Inference for Sequence Labeling via Early-Exit},
  booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  year = {2021},
  pages = {189--199},
  url = {https://aclanthology.org/2021.acl-long.16}
}
</pre>
</div>

<div id="abs_li-etal-2021-accelerating" class="abstract noshow">
	<b>Abstract</b>: Both performance and efficiency are crucial factors for sequence labeling tasks in many real-world scenarios. Although the pre-trained models (PTMs) have significantly improved the performance of various sequence labeling tasks, their computational cost is expensive. To alleviate this problem, we extend the recent successful early-exit mechanism to accelerate the inference of PTMs for sequence labeling tasks. However, existing early-exit mechanisms are specifically designed for sequence-level tasks, rather than sequence labeling. In this paper, we first propose a simple extension of sentence-level early-exit for sequence labeling tasks. To further reduce the computational cost, we also propose a token-level early-exit mechanism that allows partial tokens to exit early at different layers. Considering the local dependency inherent in sequence labeling, we employed a window-based criterion to decide for a token whether or not to exit. The token-level early-exit brings the gap between training and inference, so we introduce an extra self-sampling fine-tuning stage to alleviate it. The extensive experiments on three popular sequence labeling tasks show that our approach can save up to 66%鈭�75% inference cost with minimal performance degradation. Compared with competitive compressed models such as DistilBERT, our approach can achieve better performance under the same speed-up ratios of 2{\times}, 3{\times}, and 4{\times}.
</div>

<li id="liu2021towards" class="entry">
	<strong><font color="#0071BF">Towards Efficient NLP: A Standard Evaluation and A Strong Baseline</font></strong>,
	 arXiv preprint arXiv:2110.07038 , 2021.
    <a href="javascript:toggleInfo('liu2021towards','bibtex')">[BibTeX]</a>


    <div class="author">Xiangyang Liu, Tianxiang Sun, Junliang He, Lingling Wu, Xinyu Zhang, Hao Jiang, Zhao Cao, Xuanjing Huang, <strong>Xipeng Qiu</strong>.</div>

</li>
<div id="bib_liu2021towards" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@article{liu2021towards,
  author = {Liu, Xiangyang and Sun, Tianxiang and He, Junliang and Wu, Lingling and Zhang, Xinyu and Jiang, Hao and Cao, Zhao and Huang, Xuanjing and Qiu, Xipeng},
  title = {Towards Efficient NLP: A Standard Evaluation and A Strong Baseline},
  journal = {arXiv preprint arXiv:2110.07038},
  year = {2021}
}
</pre>
</div>

</ol>

<h4>Adapting PTMs to Downstream NLP Tasks</h4>

<ol>
<li id="dai-etal-2021-syntax" class="entry">
	<strong><font color="#0071BF">Does syntax matter? A strong baseline for Aspect-based Sentiment Analysis with RoBERTa</font></strong>,
	<strong><font color="#ff6666"> NAACL</font></strong>, 2021.
    <a href="javascript:toggleInfo('dai-etal-2021-syntax','bibtex')">[BibTeX]</a><a href="https://www.aclweb.org/anthology/2021.naacl-main.146.pdf">[PDF]</a><a href="javascript:toggleInfo('dai-etal-2021-syntax','abstract')">[Abstract]</a>
<div class="author">Junqi Dai, Hang Yan, Tianxiang Sun, Pengfei Liu, <strong>Xipeng Qiu</strong>.</div>
</li>
<div id="bib_dai-etal-2021-syntax" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{dai-etal-2021-syntax,
  author = {Dai, Junqi and Yan, Hang and Sun, Tianxiang and Liu, Pengfei and Qiu, Xipeng},
  title = {Does syntax matter? A strong baseline for Aspect-based Sentiment Analysis with RoBERTa},
  booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  year = {2021},
  pages = {1816--1829},
  url = {https://www.aclweb.org/anthology/2021.naacl-main.146}
}
</pre>
</div>

<div id="abs_dai-etal-2021-syntax" class="abstract noshow">
	<b>Abstract</b>: Aspect-based Sentiment Analysis (ABSA), aiming at predicting the polarities for aspects, is a fine-grained task in the field of sentiment analysis. Previous work showed syntactic information, e.g. dependency trees, can effectively improve the ABSA performance. Recently, pre-trained models (PTMs) also have shown their effectiveness on ABSA. Therefore, the question naturally arises whether PTMs contain sufficient syntactic information for ABSA so that we can obtain a good ABSA model only based on PTMs. In this paper, we firstly compare the induced trees from PTMs and the dependency parsing trees on several popular models for the ABSA task, showing that the induced tree from fine-tuned RoBERTa (FT-RoBERTa) outperforms the parser-provided tree. The further analysis experiments reveal that the FT-RoBERTa Induced Tree is more sentiment-word-oriented and could benefit the ABSA task. The experiments also show that the pure RoBERTa-based model can outperform or approximate to the previous SOTA performances on six datasets across four languages since it implicitly incorporates the task-oriented syntactic information.
</div>
<li id="sun2019utilizing" class="entry">
	<strong><font color="#0071BF">Utilizing BERT for Aspect-Based Sentiment Analysis via Constructing Auxiliary Sentence</font></strong>,
	<strong><font color="#ff6666"> NAACL</font></strong>, 2019.
    <a href="javascript:toggleInfo('sun2019utilizing','bibtex')">[BibTeX]</a><a href="https://github.com/HSLCY/ABSA-BERT-pair">[Code]</a><a href="javascript:toggleInfo('sun2019utilizing','abstract')">[Abstract]</a>
<div class="author">Chi Sun, Luyao Huang, <strong>Xipeng Qiu</strong>.</div>
</li>
<div id="bib_sun2019utilizing" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{sun2019utilizing,
  author = {Sun, Chi and Huang, Luyao and Qiu, Xipeng},
  title = {Utilizing BERT for Aspect-Based Sentiment Analysis via Constructing Auxiliary Sentence},
  booktitle = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
  year = {2019},
  pages = {380--385},
  url = {https://arxiv.org/pdf/1903.09588.pdf}
}
</pre>
</div>

<div id="abs_sun2019utilizing" class="abstract noshow">
	<b>Abstract</b>: Aspect-based sentiment analysis (ABSA), which aims to identify fine-grained opinion polarity towards a specific aspect, is a challenging subtask of sentiment analysis (SA). In this paper, we construct an auxiliary sentence from the aspect and convert ABSA to a sentence-pair classification task, such as question answering (QA) and natural language inference (NLI). We fine-tune the pre-trained model from BERT and achieve new state-of-the-art results on SentiHood and SemEval-2014 Task 4 datasets. The source codes are available at https://github.com/HSLCY/ABSA-BERT-pair.
</div>
<li id="sun2019finetune" class="entry">
	<strong><font color="#0071BF">How to Fine-Tune BERT for Text Classification?</font></strong>,
	<strong><font color="#ff6666"> CCL (Best Paper Award)</font></strong>, 2019.
    <a href="javascript:toggleInfo('sun2019finetune','bibtex')">[BibTeX]</a><a href="https://arxiv.org/pdf/1905.05583.pdf">[PDF]</a>
<div class="author">Chi Sun, <strong>Xipeng Qiu</strong>, Yige Xu, Xuanjing Huang.</div>
</li>
<div id="bib_sun2019finetune" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{sun2019finetune,
  author = {Chi Sun and Xipeng Qiu and Yige Xu and Xuanjing Huang},
  title = {How to Fine-Tune BERT for Text Classification?},
  booktitle = {Proceedings of China National Conference on Computational Linguistics},
  year = {2019},
  pages = {194--206},
  url = {https://arxiv.org/abs/1905.05583}
}
</pre>
</div>


</ol>

<h4>Chinese NLP</h4>

<ol>
<li id="chen2015gated" class="entry">
	<strong><font color="#0071BF">Gated Recursive Neural Network For Chinese Word Segmentation</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2015.
    <a href="javascript:toggleInfo('chen2015gated','bibtex')">[BibTeX]</a>
<div class="author">Xinchi Chen, <strong>Xipeng Qiu</strong>, Chenxi Zhu, Xuanjing Huang.</div>
</li>
<div id="bib_chen2015gated" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{chen2015gated,
  author = {Xinchi Chen and Xipeng Qiu and Chenxi Zhu and Xuanjing Huang},
  title = {Gated Recursive Neural Network For Chinese Word Segmentation},
  booktitle = {Proceedings of Annual Meeting of the Association for Computational Linguistics},
  year = {2015},
  pages = {1744--1753},
  url = {http://www.aclweb.org/anthology/P/P15/P15-1168.pdf}
}
</pre>
</div>

<li id="chen2015long" class="entry">
	<strong><font color="#0071BF">Long Short-Term Memory Neural Networks for Chinese Word Segmentation</font></strong>,
	<strong><font color="#ff6666"> EMNLP</font></strong>, 2015.
    <a href="javascript:toggleInfo('chen2015long','bibtex')">[BibTeX]</a>
<div class="author">Xinchi Chen, <strong>Xipeng Qiu</strong>, Chenxi Zhu, Pengfei Liu, Xuanjing Huang.</div>
</li>
<div id="bib_chen2015long" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{chen2015long,
  author = {Xinchi Chen and Xipeng Qiu and Chenxi Zhu and Pengfei Liu and Xuanjing Huang},
  title = {Long Short-Term Memory Neural Networks for Chinese Word Segmentation},
  booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing},
  year = {2015},
  pages = {1197--1206},
  url = {http://www.aclweb.org/anthology/D/D15/D15-1141.pdf}
}
</pre>
</div>

<li id="yan2020graph" class="entry">
	<strong><font color="#0071BF">A Graph-based Model for Joint Chinese Word Segmentation and Dependency Parsing</font></strong>,
	 Transactions of the Association for Computational Linguistics <strong><font color="#ff6666">(TACL)</font></strong> , Vol. 8, pp. 78-92, 2020.
    <a href="javascript:toggleInfo('yan2020graph','bibtex')">[BibTeX]</a><a href="https://doi.org/10.1162/tacl_a_00301">[DOI]</a><a href="https://arxiv.org/pdf/1904.04697">[PDF]</a>


    <div class="author">Hang Yan, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>

</li>
<div id="bib_yan2020graph" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@article{yan2020graph,
  author = {Yan, Hang and Qiu, Xipeng and Huang, Xuanjing},
  title = {A Graph-based Model for Joint Chinese Word Segmentation and Dependency Parsing},
  journal = {Transactions of the Association for Computational Linguistics},
  year = {2020},
  volume = {8},
  pages = {78--92},
  doi = {https://doi.org/10.1162/tacl_a_00301}
}
</pre>
</div>


<li id="qian2016new" class="entry">
	<strong><font color="#0071BF">A New Psychometric-inspired Evaluation Metric for Chinese Word Segmentation</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2016.
    <a href="javascript:toggleInfo('qian2016new','bibtex')">[BibTeX]</a>
<div class="author">Peng Qian, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_qian2016new" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{qian2016new,
  author = {Peng Qian and Xipeng Qiu and Xuanjing Huang},
  title = {A New Psychometric-inspired Evaluation Metric for Chinese Word Segmentation},
  booktitle = {Proceedings of Annual Meeting of the Association for Computational Linguistics},
  year = {2016},
  pages = {2185--2194},
  url = {http://aclweb.org/anthology/P/P16/P16-1206.pdf}
}
</pre>
</div>

<li id="chen2017adversarial" class="entry">
	<strong><font color="#0071BF">Adversarial Multi-Criteria Learning for Chinese Word Segmentation</font></strong>,
	<strong><font color="#ff6666"> ACL (Outstanding Paper Award)</font></strong>, 2017.
    <a href="javascript:toggleInfo('chen2017adversarial','bibtex')">[BibTeX]</a>
<div class="author">Xinchi Chen, Zhan Shi, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_chen2017adversarial" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{chen2017adversarial,
  author = {Xinchi Chen and Zhan Shi and Xipeng Qiu and Xuanjing Huang},
  title = {Adversarial Multi-Criteria Learning for Chinese Word Segmentation},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics},
  year = {2017},
  pages = {1193--1203},
  url = {http://aclweb.org/anthology/P/P17/P17-1110.pdf}
}
</pre>
</div>

<li id="sun2019vcwe" class="entry">
	<strong><font color="#0071BF">VCWE: Visual Character-Enhanced Word Embeddings</font></strong>,
	<strong><font color="#ff6666"> NAACL</font></strong>, 2019.
    <a href="javascript:toggleInfo('sun2019vcwe','bibtex')">[BibTeX]</a><a href="https://www.aclweb.org/anthology/N19-1277.pdf">[PDF]</a><a href="javascript:toggleInfo('sun2019vcwe','abstract')">[Abstract]</a>
<div class="author">Chi Sun, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_sun2019vcwe" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{sun2019vcwe,
  author = {Sun, Chi and Qiu, Xipeng and Huang, Xuanjing},
  title = {VCWE: Visual Character-Enhanced Word Embeddings},
  booktitle = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
  year = {2019},
  pages = {2710--2719},
  url = {https://www.aclweb.org/anthology/N19-1277}
}
</pre>
</div>

<div id="abs_sun2019vcwe" class="abstract noshow">
	<b>Abstract</b>: Chinese is a logographic writing system, and the shape of Chinese characters contain rich syntactic and semantic information. In this paper, we propose a model to learn Chinese word embeddings via three-level composition: (1) a convolutional neural network to extract the intra-character compositionality from the visual shape of a character; (2) a recurrent neural network with self-attention to compose character representation into word embeddings; (3) the Skip-Gram framework to capture non-compositionality directly from the contextual information. Evaluations demonstrate the superior performance of our model on four tasks: word similarity, sentiment analysis, named entity recognition and part-of-speech tagging.
</div>
<li id="li-etal-2020-flat" class="entry">
	<strong><font color="#0071BF">FLAT: Chinese NER Using Flat-Lattice Transformer</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2020.
    <a href="javascript:toggleInfo('li-etal-2020-flat','bibtex')">[BibTeX]</a><a href="https://arxiv.org/pdf/2004.11795.pdf">[PDF]</a><a href="https://github.com/LeeSureman/Flat-Lattice-Transformer">[Code]</a><a href="javascript:toggleInfo('li-etal-2020-flat','abstract')">[Abstract]</a>
<div class="author">Xiaonan Li, Hang Yan, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_li-etal-2020-flat" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{li-etal-2020-flat,
  author = {Li, Xiaonan and Yan, Hang and Qiu, Xipeng and Huang, Xuanjing},
  title = {FLAT: Chinese NER Using Flat-Lattice Transformer},
  booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
  year = {2020},
  pages = {6836--6842},
  url = {https://www.aclweb.org/anthology/2020.acl-main.611}
}
</pre>
</div>

<div id="abs_li-etal-2020-flat" class="abstract noshow">
	<b>Abstract</b>: Recently, the character-word lattice structure has been proved to be effective for Chinese named entity recognition (NER) by incorporating the word information. However, since the lattice structure is complex and dynamic, the lattice-based models are hard to fully utilize the parallel computation of GPUs and usually have a low inference speed. In this paper, we propose FLAT: Flat-LAttice Transformer for Chinese NER, which converts the lattice structure into a flat structure consisting of spans. Each span corresponds to a character or latent word and its position in the original lattice. With the power of Transformer and well-designed position encoding, FLAT can fully leverage the lattice information and has an excellent parallel ability. Experiments on four datasets show FLAT outperforms other lexicon-based models in performance and efficiency.
</div>
<li id="qiu-etal-2020-concise" class="entry">
	<strong><font color="#0071BF">A Concise Model for Multi-Criteria Chinese Word Segmentation with Transformer Encoder</font></strong>,
	<strong><font color="#ff6666"> EMNLP Findings</font></strong>, 2020.
    <a href="javascript:toggleInfo('qiu-etal-2020-concise','bibtex')">[BibTeX]</a><a href="https://arxiv.org/pdf/1906.12035.pdf">[PDF]</a><a href="javascript:toggleInfo('qiu-etal-2020-concise','abstract')">[Abstract]</a>
<div class="author"><strong>Xipeng Qiu</strong>, Hengzhi Pei, Hang Yan, Xuanjing Huang.</div>
</li>
<div id="bib_qiu-etal-2020-concise" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{qiu-etal-2020-concise,
  author = {Qiu, Xipeng and Pei, Hengzhi and Yan, Hang and Huang, Xuanjing},
  title = {A Concise Model for Multi-Criteria Chinese Word Segmentation with Transformer Encoder},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2020},
  year = {2020},
  pages = {2887--2897},
  url = {https://www.aclweb.org/anthology/2020.findings-emnlp.260}
}
</pre>
</div>

<div id="abs_qiu-etal-2020-concise" class="abstract noshow">
	<b>Abstract</b>: Multi-criteria Chinese word segmentation (MCCWS) aims to exploit the relations among the multiple heterogeneous segmentation criteria and further improve the performance of each single criterion. Previous work usually regards MCCWS as different tasks, which are learned together under the multi-task learning framework. In this paper, we propose a concise but effective unified model for MCCWS, which is fully-shared for all the criteria. By leveraging the powerful ability of the Transformer encoder, the proposed unified model can segment Chinese text according to a unique criterion-token indicating the output criterion. Besides, the proposed unified model can segment both simplified and traditional Chinese and has an excellent transfer capability. Experiments on eight datasets with different criteria show that our model outperforms our single-criterion baseline model and other multi-criteria models. Source codes of this paper are available on Github.
</div>
<li id="geng-etal-2021-fasthan" class="entry">
	<strong><font color="#0071BF">fastHan: A BERT-based Multi-Task Toolkit for Chinese NLP</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2021.
    <a href="javascript:toggleInfo('geng-etal-2021-fasthan','bibtex')">[BibTeX]</a><a href="https://aclanthology.org/2021.acl-demo.12.pdf">[PDF]</a><a href="javascript:toggleInfo('geng-etal-2021-fasthan','abstract')">[Abstract]</a>
<div class="author">Zhichao Geng, Hang Yan, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_geng-etal-2021-fasthan" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{geng-etal-2021-fasthan,
  author = {Geng, Zhichao and Yan, Hang and Qiu, Xipeng and Huang, Xuanjing},
  title = {fastHan: A BERT-based Multi-Task Toolkit for Chinese NLP},
  booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations},
  year = {2021},
  pages = {99--106},
  url = {https://aclanthology.org/2021.acl-demo.12}
}
</pre>
</div>

<div id="abs_geng-etal-2021-fasthan" class="abstract noshow">
	<b>Abstract</b>: We present fastHan, an open-source toolkit for four basic tasks in Chinese natural language processing: Chinese word segmentation (CWS), Part-of-Speech (POS) tagging, named entity recognition (NER), and dependency parsing. The backbone of fastHan is a multi-task model based on a pruned BERT, which uses the first 8 layers in BERT. We also provide a 4-layer base model compressed from the 8-layer model. The joint-model is trained and evaluated on 13 corpora of four tasks, yielding near state-of-the-art (SOTA) performance in dependency parsing and NER, achieving SOTA performance in CWS and POS. Besides, fastHan's transferability is also strong, performing much better than popular segmentation tools on a non-training corpus. To better meet the need of practical application, we allow users to use their own labeled data to further fine-tune fastHan. In addition to its small size and excellent performance, fastHan is user-friendly. Implemented as a python package, fastHan isolates users from the internal technical details and is convenient to use. The project is released on Github.
</div>
</ol>


<h4>Reliable NLP</h4>

<ol>
<li id="li-etal-2020-bert-attack" class="entry">
	<strong><font color="#0071BF">BERT-ATTACK: Adversarial Attack Against BERT Using BERT</font></strong>,
	<strong><font color="#ff6666"> EMNLP</font></strong>, 2020.
    <a href="javascript:toggleInfo('li-etal-2020-bert-attack','bibtex')">[BibTeX]</a><a href="https://arxiv.org/pdf/2004.09984.pdf">[PDF]</a><a href="javascript:toggleInfo('li-etal-2020-bert-attack','abstract')">[Abstract]</a>
<div class="author">Linyang Li, Ruotian Ma, Qipeng Guo, Xiangyang Xue, <strong>Xipeng Qiu</strong>.</div>
</li>
<div id="bib_li-etal-2020-bert-attack" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{li-etal-2020-bert-attack,
  author = {Li, Linyang and Ma, Ruotian and Guo, Qipeng and Xue, Xiangyang and Qiu, Xipeng},
  title = {BERT-ATTACK: Adversarial Attack Against BERT Using BERT},
  booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing},
  year = {2020},
  pages = {6193--6202},
  url = {https://www.aclweb.org/anthology/2020.emnlp-main.500}
}
</pre>
</div>

<div id="abs_li-etal-2020-bert-attack" class="abstract noshow">
	<b>Abstract</b>: Adversarial attacks for discrete data (such as texts) have been proved significantly more challenging than continuous data (such as images) since it is difficult to generate adversarial samples with gradient-based methods. Current successful attack methods for texts usually adopt heuristic replacement strategies on the character or word level, which remains challenging to find the optimal solution in the massive space of possible combinations of replacements while preserving semantic consistency and language fluency. In this paper, we propose <b>BERT-Attack</b>, a high-quality and effective method to generate adversarial samples using pre-trained masked language models exemplified by BERT. We turn BERT against its fine-tuned models and other deep neural models in downstream tasks so that we can successfully mislead the target models to predict incorrectly. Our method outperforms state-of-the-art attack strategies in both success rate and perturb percentage, while the generated adversarial samples are fluent and semantically preserved. Also, the cost of calculation is low, thus possible for large-scale generations. The code is available at https://github.com/LinyangLee/BERT-Attack.
</div>
<li id="Li_Qiu_2021" class="entry">
	<strong><font color="#0071BF">Token-Aware Virtual Adversarial Training in Natural Language Understanding</font></strong>,
	<strong><font color="#ff6666"> AAAI</font></strong>, 2021.
    <a href="javascript:toggleInfo('Li_Qiu_2021','bibtex')">[BibTeX]</a><a href="https://arxiv.org/pdf/2004.14543.pdf">[PDF]</a>
<div class="author">Linyang Li, <strong>Xipeng Qiu</strong>.</div>
</li>
<div id="bib_Li_Qiu_2021" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{Li_Qiu_2021,
  author = {Li, Linyang and Qiu, Xipeng},
  title = {Token-Aware Virtual Adversarial Training in Natural Language Understanding},
  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
  year = {2021},
  volume = {35},
  number = {9},
  pages = {8410-8418},
  url = {https://ojs.aaai.org/index.php/AAAI/article/view/17022}
}
</pre>
</div>

<li id="li-etal-2021-backdoor" class="entry">
	<strong><font color="#0071BF">Backdoor Attacks on Pre-trained Models by Layerwise Weight Poisoning</font></strong>,
	<strong><font color="#ff6666"> EMNLP</font></strong>, 2021.
    <a href="javascript:toggleInfo('li-etal-2021-backdoor','bibtex')">[BibTeX]</a><a href="https://aclanthology.org/2021.emnlp-main.241.pdf">[PDF]</a><a href="javascript:toggleInfo('li-etal-2021-backdoor','abstract')">[Abstract]</a>
<div class="author">Linyang Li, Demin Song, Xiaonan Li, Jiehang Zeng, Ruotian Ma, <strong>Xipeng Qiu</strong>.</div>
</li>
<div id="bib_li-etal-2021-backdoor" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{li-etal-2021-backdoor,
  author = {Li, Linyang and Song, Demin and Li, Xiaonan and Zeng, Jiehang and Ma, Ruotian and Qiu, Xipeng},
  title = {Backdoor Attacks on Pre-trained Models by Layerwise Weight Poisoning},
  booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
  year = {2021},
  pages = {3023--3032},
  url = {https://aclanthology.org/2021.emnlp-main.241}
}
</pre>
</div>

<div id="abs_li-etal-2021-backdoor" class="abstract noshow">
	<b>Abstract</b>: <b>P</b>re-<b>T</b>rained <b>M</b>odel<b>s</b> have been widely applied and recently proved vulnerable under backdoor attacks: the released pre-trained weights can be maliciously poisoned with certain triggers. When the triggers are activated, even the fine-tuned model will predict pre-defined labels, causing a security threat. These backdoors generated by the poisoning methods can be erased by changing hyper-parameters during fine-tuning or detected by finding the triggers. In this paper, we propose a stronger weight-poisoning attack method that introduces a layerwise weight poisoning strategy to plant deeper backdoors; we also introduce a combinatorial trigger that cannot be easily detected. The experiments on text classification tasks show that previous defense methods cannot resist our weight-poisoning method, which indicates that our method can be widely applied and may provide hints for future model robustness studies.
</div>
<li id="wang-etal-2021-textflint" class="entry">
	<strong><font color="#0071BF">TextFlint: Unified Multilingual Robustness Evaluation Toolkit for Natural Language Processing</font></strong>,
	, 2021.
    <a href="javascript:toggleInfo('wang-etal-2021-textflint','bibtex')">[BibTeX]</a><a href="javascript:toggleInfo('wang-etal-2021-textflint','abstract')">[Abstract]</a>
<div class="author">Xiao Wang, Qin Liu, Tao Gui, Qi Zhang, Yicheng Zou, Xin Zhou, Jiacheng Ye, Yongxin Zhang, Rui Zheng, Zexiong Pang, Qinzhuo Wu, Zhengyan Li, Chong Zhang, Ruotian Ma, Zichu Fei, Ruijian Cai, Jun Zhao, Xingwu Hu, Zhiheng Yan, Yiding Tan, Yuan Hu, Qiyuan Bian, Zhihua Liu, Shan Qin, Bolin Zhu, Xiaoyu Xing, Jinlan Fu, Yue Zhang, Minlong Peng, Xiaoqing Zheng, Yaqian Zhou, Zhongyu Wei, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_wang-etal-2021-textflint" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{wang-etal-2021-textflint,
  author = {Wang, Xiao and Liu, Qin and Gui, Tao and Zhang, Qi and Zou, Yicheng and Zhou, Xin and Ye, Jiacheng and Zhang, Yongxin and Zheng, Rui and Pang, Zexiong and Wu, Qinzhuo and Li, Zhengyan and Zhang, Chong and Ma, Ruotian and Fei, Zichu and Cai, Ruijian and Zhao, Jun and Hu, Xingwu and Yan, Zhiheng and Tan, Yiding and Hu, Yuan and Bian, Qiyuan and Liu, Zhihua and Qin, Shan and Zhu, Bolin and Xing, Xiaoyu and Fu, Jinlan and Zhang, Yue and Peng, Minlong and Zheng, Xiaoqing and Zhou, Yaqian and Wei, Zhongyu and Qiu, Xipeng and Huang, Xuanjing},
  title = {TextFlint: Unified Multilingual Robustness Evaluation Toolkit for Natural Language Processing},
  booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations},
  year = {2021},
  pages = {347--355},
  url = {https://aclanthology.org/2021.acl-demo.41}
}
</pre>
</div>

<div id="abs_wang-etal-2021-textflint" class="abstract noshow">
	<b>Abstract</b>: TextFlint is a multilingual robustness evaluation toolkit for NLP tasks that incorporates universal text transformation, task-specific transformation, adversarial attack, subpopulation, and their combinations to provide comprehensive robustness analyses. This enables practitioners to automatically evaluate their models from various aspects or to customize their evaluations as desired with just a few lines of code. TextFlint also generates complete analytical reports as well as targeted augmented data to address the shortcomings of the model in terms of its robustness. To guarantee acceptability, all the text transformations are linguistically based and all the transformed data selected (up to 100,000 texts) scored highly under human evaluation. To validate the utility, we performed large-scale empirical evaluations (over 67,000) on state-of-the-art deep learning models, classic supervised methods, and real-world systems. The toolkit is already available at https://github.com/textflint with all the evaluation results demonstrated at textflint.io.
</div>


</ol>

<h4>Text Matching</h4>

<ol>
<li id="liu2016modelling" class="entry">
	<strong><font color="#0071BF">Modelling Interaction of Sentence Pair with Coupled-LSTMs</font></strong>,
	<strong><font color="#ff6666"> EMNLP</font></strong>, 2016.
    <a href="javascript:toggleInfo('liu2016modelling','bibtex')">[BibTeX]</a>
<div class="author">Pengfei Liu, <strong>Xipeng Qiu</strong>, Yaqian Zhou, Jifan Chen, Xuanjing Huang.</div>
</li>
<div id="bib_liu2016modelling" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{liu2016modelling,
  author = {Liu, Pengfei and Qiu, Xipeng and Zhou, Yaqian and Chen, Jifan and Huang, Xuanjing},
  title = {Modelling Interaction of Sentence Pair with Coupled-LSTMs},
  booktitle = {Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing},
  year = {2016},
  pages = {1703--1712},
  url = {https://aclweb.org/anthology/D16-1176}
}
</pre>
</div>

<li id="qiu2015convolutional" class="entry">
	<strong><font color="#0071BF">Convolutional Neural Tensor Network Architecture for Community-based Question Answering</font></strong>,
	<strong><font color="#ff6666"> IJCAI</font></strong>, 2015.
    <a href="javascript:toggleInfo('qiu2015convolutional','bibtex')">[BibTeX]</a>
<div class="author"><strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_qiu2015convolutional" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{qiu2015convolutional,
  author = {Xipeng Qiu and Xuanjing Huang},
  title = {Convolutional Neural Tensor Network Architecture for Community-based Question Answering},
  booktitle = {Proceedings of International Joint Conference on Artificial Intelligence},
  year = {2015},
  url = {http://ijcai.org/papers15/Papers/IJCAI15-188.pdf}
}
</pre>
</div>

<li id="gong2018convolutional" class="entry">
	<strong><font color="#0071BF">Convolutional Interaction Network for Natural Language Inference</font></strong>,
	<strong><font color="#ff6666"> EMNLP</font></strong>, 2018.
    <a href="javascript:toggleInfo('gong2018convolutional','bibtex')">[BibTeX]</a>
<div class="author">Jingjing Gong, <strong>Xipeng Qiu</strong>, Xinchi Chen, Dong Liang, Xuanjing Huang.</div>
</li>
<div id="bib_gong2018convolutional" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{gong2018convolutional,
  author = {Gong, Jingjing and Qiu, Xipeng and Chen, Xinchi and Liang, Dong and Huang, Xuanjing},
  title = {Convolutional Interaction Network for Natural Language Inference},
  booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
  year = {2018},
  pages = {1576--1585},
  url = {https://www.aclweb.org/anthology/D18-1186}
}
</pre>
</div>

<li id="liu2016deep" class="entry">
	<strong><font color="#0071BF">Deep Fusion LSTMs for Text Semantic Matching</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2016.
    <a href="javascript:toggleInfo('liu2016deep','bibtex')">[BibTeX]</a><a href="http://aclweb.org/anthology/P/P16/P16-1098.pdf">[PDF]</a>
<div class="author">Pengfei Liu, <strong>Xipeng Qiu</strong>, Jifan Chen, Xuanjing Huang.</div>
</li>
<div id="bib_liu2016deep" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{liu2016deep,
  author = {Pengfei Liu and Xipeng Qiu and Jifan Chen and Xuanjing Huang},
  title = {Deep Fusion LSTMs for Text Semantic Matching},
  booktitle = {Proceedings of Annual Meeting of the Association for Computational Linguistics},
  year = {2016},
  pages = {1034--1043}
}
</pre>
</div>

<li id="zhong-etal-2020-extractive" class="entry">
	<strong><font color="#0071BF">Extractive Summarization as Text Matching</font></strong>,
	<strong><font color="#ff6666"> ACL</font></strong>, 2020.
    <a href="javascript:toggleInfo('zhong-etal-2020-extractive','bibtex')">[BibTeX]</a><a href="https://arxiv.org/pdf/2004.08795">[PDF]</a><a href="https://github.com/maszhongming/MatchSum">[Code]</a><a href="javascript:toggleInfo('zhong-etal-2020-extractive','abstract')">[Abstract]</a>
<div class="author">Ming Zhong, Pengfei Liu, Yiran Chen, Danqing Wang, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_zhong-etal-2020-extractive" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{zhong-etal-2020-extractive,
  author = {Zhong, Ming and Liu, Pengfei and Chen, Yiran and Wang, Danqing and Qiu, Xipeng and Huang, Xuanjing},
  title = {Extractive Summarization as Text Matching},
  booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
  year = {2020},
  pages = {6197--6208},
  url = {https://www.aclweb.org/anthology/2020.acl-main.552}
}
</pre>
</div>

<div id="abs_zhong-etal-2020-extractive" class="abstract noshow">
	<b>Abstract</b>: This paper creates a paradigm shift with regard to the way we build neural extractive summarization systems. Instead of following the commonly used framework of extracting sentences individually and modeling the relationship between sentences, we formulate the extractive summarization task as a semantic text matching problem, in which a source document and candidate summaries will be (extracted from the original text) matched in a semantic space. Notably, this paradigm shift to semantic matching framework is well-grounded in our comprehensive analysis of the inherent gap between sentence-level and summary-level extractors based on the property of the dataset. Besides, even instantiating the framework with a simple form of a matching model, we have driven the state-of-the-art extractive result on CNN/DailyMail to a new level (44.41 in ROUGE-1). Experiments on the other five datasets also show the effectiveness of the matching framework. We believe the power of this matching-based summarization framework has not been fully exploited. To encourage more instantiations in the future, we have released our codes, processed dataset, as well as generated summaries in url.
</div>
</ol>

<h4>Multi-Task Learning for NLP</h4>


<ol>


<li id="liu2016deep-multitask" class="entry">
	<strong><font color="#0071BF">Deep Multi-Task Learning with Shared Memory</font></strong>,
	<strong><font color="#ff6666"> EMNLP</font></strong>, 2016.
    <a href="javascript:toggleInfo('liu2016deep-multitask','bibtex')">[BibTeX]</a>
<div class="author">Pengfei Liu, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_liu2016deep-multitask" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{liu2016deep-multitask,
  author = {Liu, Pengfei and Qiu, Xipeng and Huang, Xuanjing},
  title = {Deep Multi-Task Learning with Shared Memory},
  booktitle = {Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing},
  year = {2016},
  pages = {118--127},
  url = {https://aclweb.org/anthology/D16-1012}
}
</pre>
</div>

<li id="liu2016recurrent" class="entry">
	<strong><font color="#0071BF">Recurrent Neural Network for Text Classification with Multi-Task Learning</font></strong>,
	<strong><font color="#ff6666"> IJCAI</font></strong>, 2016.
    <a href="javascript:toggleInfo('liu2016recurrent','bibtex')">[BibTeX]</a>
<div class="author">Pengfei Liu, <strong>Xipeng Qiu</strong>, Xuanjing Huang.</div>
</li>
<div id="bib_liu2016recurrent" class="bibtex noshow">
<b>BibTeX</b>:
<pre>
@inproceedings{liu2016recurrent,
  author = {Pengfei Liu and Xipeng Qiu and Xuanjing Huang},
  title = {Recurrent Neural Network for Text Classification with Multi-Task Learning},
  booktitle = {Proceedings of International Joint Conference on Artificial Intelligence},
  year = {2016},
  pages = {2873--2879},
  url = {https://arxiv.org/abs/1605.05101}
}
</pre>
</div>