Compare vim73/src/regexp.c vs vim74/src/regexp.c

vim73/src/regexp.c
176566 Tue, 13 Jul 2010 06:41:48 EST
vim74/src/regexp.c
198032 Fri, 02 Aug 2013 02:31:30 EST
Line 38 to 4638 * Named character class support added by Walter Briscoe (1998 Jul 01) 39 */ 40 41 #include "vim.h" 42 43 #undef DEBUG 44 45 /* 46 * The "internal use only" fields in regexp.h are present to pass info from Line 38 to 5938 * Named character class support added by Walter Briscoe (1998 Jul 01) 39 */ 40 41 /* Uncomment the first if you do not want to see debugging logs or files 42 * related to regular expressions, even when compiling with -DDEBUG. 43 * Uncomment the second to get the regexp debugging. */ 44 /* #undef DEBUG */ 45 /* #define DEBUG */ 46 47 #include "vim.h" 48 49 #ifdef DEBUG 50 /* show/save debugging data when BT engine is used */ 51 # define BT_REGEXP_DUMP 52 /* save the debugging data to a file instead of displaying it */ 53 # define BT_REGEXP_LOG 54 # define BT_REGEXP_DEBUG_LOG 55 # define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log" 56 #endif 57 58 /* 59 * The "internal use only" fields in regexp.h are present to pass info from
Line 326 to 334326 /* Used for an error (down from) vim_regcomp(): give the error message, set 327 * rc_did_emsg and return NULL */ 328 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL) 329 #define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) 330 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL) 331 #define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) 332 333 #define MAX_LIMIT (32767L << 16L) 334 Line 339 to 348339 /* Used for an error (down from) vim_regcomp(): give the error message, set 340 * rc_did_emsg and return NULL */ 341 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL) 342 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL) 343 #define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) 344 #define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL) 345 #define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) 346 347 #define MAX_LIMIT (32767L << 16L) 348
Line 336 to 346336 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n)); 337 static char_u *cstrchr __ARGS((char_u *, int)); 338 339 #ifdef DEBUG 340 static void regdump __ARGS((char_u *, regprog_T *)); 341 static char_u *regprop __ARGS((char_u *)); 342 #endif 343 344 #define NOT_MULTI 0 345 #define MULTI_ONE 1 346 #define MULTI_MULT 2 Line 350 to 372350 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n)); 351 static char_u *cstrchr __ARGS((char_u *, int)); 352 353 #ifdef BT_REGEXP_DUMP 354 static void regdump __ARGS((char_u *, bt_regprog_T *)); 355 #endif 356 #ifdef DEBUG 357 static char_u *regprop __ARGS((char_u *)); 358 #endif 359 360 static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); 361 static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); 362 static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); 363 static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); 364 #ifdef FEAT_SYN_HL 365 static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); 366 static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here"); 367 #endif 368 static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%["); 369 static char_u e_empty_sb[] = N_("E70: Empty %s%%[]"); 370 #define NOT_MULTI 0 371 #define MULTI_ONE 1 372 #define MULTI_MULT 2
Line 630 to 636630 }; 631 #endif 632 633 static int curchr; 634 635 /* arguments for reg() */ 636 #define REG_NOPAREN 0 /* toplevel reg() */ Line 656 to 668656 }; 657 #endif 658 659 static int curchr; /* currently parsed character */ 660 /* Previous character. Note: prevchr is sometimes -1 when we are not at the 661 * start, eg in /[ ^I]^ the pattern was never found even if it existed, 662 * because ^ was taken to be magic -- webb */ 663 static int prevchr; 664 static int prevprevchr; /* previous-previous character */ 665 static int nextchr; /* used for ungetchr() */ 666 667 /* arguments for reg() */ 668 #define REG_NOPAREN 0 /* toplevel reg() */
Line 638 to 647638 #define REG_ZPAREN 2 /* \z(\) */ 639 #define REG_NPAREN 3 /* \%(\) */ 640 641 /* 642 * Forward declarations for vim_regcomp()'s friends. 643 */ 644 static void initchr __ARGS((char_u *)); 645 static int getchr __ARGS((void)); 646 static void skipchr_keepstart __ARGS((void)); 647 static int peekchr __ARGS((void)); Line 670 to 694670 #define REG_ZPAREN 2 /* \z(\) */ 671 #define REG_NPAREN 3 /* \%(\) */ 672 673 typedef struct 674 { 675 char_u *regparse; 676 int prevchr_len; 677 int curchr; 678 int prevchr; 679 int prevprevchr; 680 int nextchr; 681 int at_start; 682 int prev_at_start; 683 int regnpar; 684 } parse_state_T; 685 686 /* 687 * Forward declarations for vim_regcomp()'s friends. 688 */ 689 static void initchr __ARGS((char_u *)); 690 static void save_parse_state __ARGS((parse_state_T *ps)); 691 static void restore_parse_state __ARGS((parse_state_T *ps)); 692 static int getchr __ARGS((void)); 693 static void skipchr_keepstart __ARGS((void)); 694 static int peekchr __ARGS((void));
Line 666 to 680666 static void regc __ARGS((int b)); 667 #ifdef FEAT_MBYTE 668 static void regmbc __ARGS((int c)); 669 #else 670 # define regmbc(c) regc(c) 671 #endif 672 static void reginsert __ARGS((int, char_u *)); 673 static void reginsert_limits __ARGS((int, long, long, char_u *)); 674 static char_u *re_put_long __ARGS((char_u *pr, long_u val)); 675 static int read_limits __ARGS((long *, long *)); 676 static void regtail __ARGS((char_u *, char_u *)); 677 static void regoptail __ARGS((char_u *, char_u *)); 678 679 /* 680 * Return TRUE if compiled regular expression "prog" can match a line break. Line 713 to 735713 static void regc __ARGS((int b)); 714 #ifdef FEAT_MBYTE 715 static void regmbc __ARGS((int c)); 716 # define REGMBC(x) regmbc(x); 717 # define CASEMBC(x) case x: 718 #else 719 # define regmbc(c) regc(c) 720 # define REGMBC(x) 721 # define CASEMBC(x) 722 #endif 723 static void reginsert __ARGS((int, char_u *)); 724 static void reginsert_nr __ARGS((int op, long val, char_u *opnd)); 725 static void reginsert_limits __ARGS((int, long, long, char_u *)); 726 static char_u *re_put_long __ARGS((char_u *pr, long_u val)); 727 static int read_limits __ARGS((long *, long *)); 728 static void regtail __ARGS((char_u *, char_u *)); 729 static void regoptail __ARGS((char_u *, char_u *)); 730 731 static regengine_T bt_regengine; 732 static regengine_T nfa_regengine; 733 734 /* 735 * Return TRUE if compiled regular expression "prog" can match a line break.
Line 758 to 763758 /* 759 * Produce the bytes for equivalence class "c". 760 * Currently only handles latin1, latin9 and utf-8. 761 */ 762 static void 763 reg_equi_class(c) Line 813 to 819813 /* 814 * Produce the bytes for equivalence class "c". 815 * Currently only handles latin1, latin9 and utf-8. 816 * NOTE: When changing this function, also change nfa_emit_equi_class() 817 */ 818 static void 819 reg_equi_class(c)
Line 787 to 854787 switch (c) 788 { 789 case 'A': case '\300': case '\301': case '\302': 790 case '\303': case '\304': case '\305': 791 regmbc('A'); regmbc('\300'); regmbc('\301'); 792 regmbc('\302'); regmbc('\303'); regmbc('\304'); 793 regmbc('\305'); 794 return; 795 case 'C': case '\307': 796 regmbc('C'); regmbc('\307'); 797 return; 798 case 'E': case '\310': case '\311': case '\312': case '\313': 799 regmbc('E'); regmbc('\310'); regmbc('\311'); 800 regmbc('\312'); regmbc('\313'); 801 return; 802 case 'I': case '\314': case '\315': case '\316': case '\317': 803 regmbc('I'); regmbc('\314'); regmbc('\315'); 804 regmbc('\316'); regmbc('\317'); 805 return; 806 case 'N': case '\321': 807 regmbc('N'); regmbc('\321'); 808 return; 809 case 'O': case '\322': case '\323': case '\324': case '\325': 810 case '\326': 811 regmbc('O'); regmbc('\322'); regmbc('\323'); 812 regmbc('\324'); regmbc('\325'); regmbc('\326'); 813 return; 814 case 'U': case '\331': case '\332': case '\333': case '\334': 815 regmbc('U'); regmbc('\331'); regmbc('\332'); 816 regmbc('\333'); regmbc('\334'); 817 return; 818 case 'Y': case '\335': 819 regmbc('Y'); regmbc('\335'); 820 return; 821 case 'a': case '\340': case '\341': case '\342': 822 case '\343': case '\344': case '\345': 823 regmbc('a'); regmbc('\340'); regmbc('\341'); 824 regmbc('\342'); regmbc('\343'); regmbc('\344'); 825 regmbc('\345'); 826 return; 827 case 'c': case '\347': 828 regmbc('c'); regmbc('\347'); 829 return; 830 case 'e': case '\350': case '\351': case '\352': case '\353': 831 regmbc('e'); regmbc('\350'); regmbc('\351'); 832 regmbc('\352'); regmbc('\353'); 833 return; 834 case 'i': case '\354': case '\355': case '\356': case '\357': 835 regmbc('i'); regmbc('\354'); regmbc('\355'); 836 regmbc('\356'); regmbc('\357'); 837 return; 838 case 'n': case '\361': 839 regmbc('n'); regmbc('\361'); 840 return; 841 case 'o': case '\362': case '\363': case '\364': case '\365': 842 case '\366': 843 regmbc('o'); regmbc('\362'); regmbc('\363'); 844 regmbc('\364'); regmbc('\365'); regmbc('\366'); 845 return; 846 case 'u': case '\371': case '\372': case '\373': case '\374': 847 regmbc('u'); regmbc('\371'); regmbc('\372'); 848 regmbc('\373'); regmbc('\374'); 849 return; 850 case 'y': case '\375': case '\377': 851 regmbc('y'); regmbc('\375'); regmbc('\377'); 852 return; 853 } 854 #endif Line 843 to 1137843 switch (c) 844 { 845 case 'A': case '\300': case '\301': case '\302': 846 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd) 847 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2) 848 case '\303': case '\304': case '\305': 849 regmbc('A'); regmbc('\300'); regmbc('\301'); 850 regmbc('\302'); regmbc('\303'); regmbc('\304'); 851 regmbc('\305'); 852 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104) 853 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0) 854 REGMBC(0x1ea2) 855 return; 856 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06) 857 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06) 858 return; 859 case 'C': case '\307': 860 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c) 861 regmbc('C'); regmbc('\307'); 862 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a) 863 REGMBC(0x10c) 864 return; 865 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a) 866 CASEMBC(0x1e0e) CASEMBC(0x1e10) 867 regmbc('D'); REGMBC(0x10e) REGMBC(0x110) 868 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10) 869 return; 870 case 'E': case '\310': case '\311': case '\312': case '\313': 871 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118) 872 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc) 873 regmbc('E'); regmbc('\310'); regmbc('\311'); 874 regmbc('\312'); regmbc('\313'); 875 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116) 876 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba) 877 REGMBC(0x1ebc) 878 return; 879 case 'F': CASEMBC(0x1e1e) 880 regmbc('F'); REGMBC(0x1e1e) 881 return; 882 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120) 883 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4) 884 CASEMBC(0x1e20) 885 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e) 886 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4) 887 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20) 888 return; 889 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22) 890 CASEMBC(0x1e26) CASEMBC(0x1e28) 891 regmbc('H'); REGMBC(0x124) REGMBC(0x126) 892 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28) 893 return; 894 case 'I': case '\314': case '\315': case '\316': case '\317': 895 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e) 896 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8) 897 regmbc('I'); regmbc('\314'); regmbc('\315'); 898 regmbc('\316'); regmbc('\317'); 899 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c) 900 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf) 901 REGMBC(0x1ec8) 902 return; 903 case 'J': CASEMBC(0x134) 904 regmbc('J'); REGMBC(0x134) 905 return; 906 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30) 907 CASEMBC(0x1e34) 908 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8) 909 REGMBC(0x1e30) REGMBC(0x1e34) 910 return; 911 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d) 912 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a) 913 regmbc('L'); REGMBC(0x139) REGMBC(0x13b) 914 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141) 915 REGMBC(0x1e3a) 916 return; 917 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40) 918 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40) 919 return; 920 case 'N': case '\321': 921 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44) 922 CASEMBC(0x1e48) 923 regmbc('N'); regmbc('\321'); 924 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147) 925 REGMBC(0x1e44) REGMBC(0x1e48) 926 return; 927 case 'O': case '\322': case '\323': case '\324': case '\325': 928 case '\326': case '\330': 929 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0) 930 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece) 931 regmbc('O'); regmbc('\322'); regmbc('\323'); 932 regmbc('\324'); regmbc('\325'); regmbc('\326'); 933 regmbc('\330'); 934 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150) 935 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea) 936 REGMBC(0x1ec) REGMBC(0x1ece) 937 return; 938 case 'P': case 0x1e54: case 0x1e56: 939 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56) 940 return; 941 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158) 942 CASEMBC(0x1e58) CASEMBC(0x1e5e) 943 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158) 944 REGMBC(0x1e58) REGMBC(0x1e5e) 945 return; 946 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e) 947 CASEMBC(0x160) CASEMBC(0x1e60) 948 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c) 949 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60) 950 return; 951 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166) 952 CASEMBC(0x1e6a) CASEMBC(0x1e6e) 953 regmbc('T'); REGMBC(0x162) REGMBC(0x164) 954 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e) 955 return; 956 case 'U': case '\331': case '\332': case '\333': case '\334': 957 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e) 958 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3) 959 CASEMBC(0x1ee6) 960 regmbc('U'); regmbc('\331'); regmbc('\332'); 961 regmbc('\333'); regmbc('\334'); 962 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c) 963 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172) 964 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6) 965 return; 966 case 'V': CASEMBC(0x1e7c) 967 regmbc('V'); REGMBC(0x1e7c) 968 return; 969 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82) 970 CASEMBC(0x1e84) CASEMBC(0x1e86) 971 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80) 972 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86) 973 return; 974 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c) 975 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c) 976 return; 977 case 'Y': case '\335': 978 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2) 979 CASEMBC(0x1ef6) CASEMBC(0x1ef8) 980 regmbc('Y'); regmbc('\335'); 981 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e) 982 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8) 983 return; 984 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d) 985 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94) 986 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b) 987 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90) 988 REGMBC(0x1e94) 989 return; 990 case 'a': case '\340': case '\341': case '\342': 991 case '\343': case '\344': case '\345': 992 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce) 993 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3) 994 regmbc('a'); regmbc('\340'); regmbc('\341'); 995 regmbc('\342'); regmbc('\343'); regmbc('\344'); 996 regmbc('\345'); 997 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105) 998 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1) 999 REGMBC(0x1ea3) 1000 return; 1001 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07) 1002 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07) 1003 return; 1004 case 'c': case '\347': 1005 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d) 1006 regmbc('c'); regmbc('\347'); 1007 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b) 1008 REGMBC(0x10d) 1009 return; 1010 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1d0b) 1011 CASEMBC(0x1e11) 1012 regmbc('d'); REGMBC(0x10f) REGMBC(0x111) 1013 REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11) 1014 return; 1015 case 'e': case '\350': case '\351': case '\352': case '\353': 1016 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119) 1017 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd) 1018 regmbc('e'); regmbc('\350'); regmbc('\351'); 1019 regmbc('\352'); regmbc('\353'); 1020 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117) 1021 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb) 1022 REGMBC(0x1ebd) 1023 return; 1024 case 'f': CASEMBC(0x1e1f) 1025 regmbc('f'); REGMBC(0x1e1f) 1026 return; 1027 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121) 1028 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5) 1029 CASEMBC(0x1e21) 1030 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f) 1031 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5) 1032 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21) 1033 return; 1034 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23) 1035 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96) 1036 regmbc('h'); REGMBC(0x125) REGMBC(0x127) 1037 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29) 1038 REGMBC(0x1e96) 1039 return; 1040 case 'i': case '\354': case '\355': case '\356': case '\357': 1041 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f) 1042 CASEMBC(0x1d0) CASEMBC(0x1ec9) 1043 regmbc('i'); regmbc('\354'); regmbc('\355'); 1044 regmbc('\356'); regmbc('\357'); 1045 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d) 1046 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9) 1047 return; 1048 case 'j': CASEMBC(0x135) CASEMBC(0x1f0) 1049 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0) 1050 return; 1051 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31) 1052 CASEMBC(0x1e35) 1053 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9) 1054 REGMBC(0x1e31) REGMBC(0x1e35) 1055 return; 1056 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e) 1057 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b) 1058 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c) 1059 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142) 1060 REGMBC(0x1e3b) 1061 return; 1062 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41) 1063 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41) 1064 return; 1065 case 'n': case '\361': 1066 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149) 1067 CASEMBC(0x1e45) CASEMBC(0x1e49) 1068 regmbc('n'); regmbc('\361'); 1069 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148) 1070 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49) 1071 return; 1072 case 'o': case '\362': case '\363': case '\364': case '\365': 1073 case '\366': case '\370': 1074 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1) 1075 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf) 1076 regmbc('o'); regmbc('\362'); regmbc('\363'); 1077 regmbc('\364'); regmbc('\365'); regmbc('\366'); 1078 regmbc('\370'); 1079 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151) 1080 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb) 1081 REGMBC(0x1ed) REGMBC(0x1ecf) 1082 return; 1083 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57) 1084 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57) 1085 return; 1086 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159) 1087 CASEMBC(0x1e59) CASEMBC(0x1e5f) 1088 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159) 1089 REGMBC(0x1e59) REGMBC(0x1e5f) 1090 return; 1091 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f) 1092 CASEMBC(0x161) CASEMBC(0x1e61) 1093 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d) 1094 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61) 1095 return; 1096 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167) 1097 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97) 1098 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167) 1099 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97) 1100 return; 1101 case 'u': case '\371': case '\372': case '\373': case '\374': 1102 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f) 1103 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4) 1104 CASEMBC(0x1ee7) 1105 regmbc('u'); regmbc('\371'); regmbc('\372'); 1106 regmbc('\373'); regmbc('\374'); 1107 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d) 1108 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173) 1109 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7) 1110 return; 1111 case 'v': CASEMBC(0x1e7d) 1112 regmbc('v'); REGMBC(0x1e7d) 1113 return; 1114 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83) 1115 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98) 1116 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81) 1117 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87) 1118 REGMBC(0x1e98) 1119 return; 1120 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d) 1121 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d) 1122 return; 1123 case 'y': case '\375': case '\377': 1124 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99) 1125 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9) 1126 regmbc('y'); regmbc('\375'); regmbc('\377'); 1127 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99) 1128 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9) 1129 return; 1130 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e) 1131 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95) 1132 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c) 1133 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91) 1134 REGMBC(0x1e95) 1135 return; 1136 } 1137 #endif
Line 891 to 896891 return 0; 892 } 893 894 895 /* 896 * Skip over a "[]" range. Line 1174 to 11891174 return 0; 1175 } 1176 1177 static void get_cpo_flags __ARGS((void)); 1178 static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */ 1179 static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */ 1180 1181 static void 1182 get_cpo_flags() 1183 { 1184 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL; 1185 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL; 1186 } 1187 1188 /* 1189 * Skip over a "[]" range.
Line 901 to 915901 skip_anyof(p) 902 char_u *p; 903 { 904 int cpo_lit; /* 'cpoptions' contains 'l' flag */ 905 int cpo_bsl; /* 'cpoptions' contains '\' flag */ 906 #ifdef FEAT_MBYTE 907 int l; 908 #endif 909 910 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL; 911 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL; 912 913 if (*p == '^') /* Complement of range. */ 914 ++p; 915 if (*p == ']' || *p == '-') Line 1194 to 12031194 skip_anyof(p) 1195 char_u *p; 1196 { 1197 #ifdef FEAT_MBYTE 1198 int l; 1199 #endif 1200 1201 if (*p == '^') /* Complement of range. */ 1202 ++p; 1203 if (*p == ']' || *p == '-')
Line 928 to 936928 mb_ptr_adv(p); 929 } 930 else if (*p == '\\' 931 && !cpo_bsl 932 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL 933 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL))) 934 p += 2; 935 else if (*p == '[') 936 { Line 1216 to 12241216 mb_ptr_adv(p); 1217 } 1218 else if (*p == '\\' 1219 && !reg_cpo_bsl 1220 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL 1221 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL))) 1222 p += 2; 1223 else if (*p == '[') 1224 {
Line 969 to 974969 mymagic = MAGIC_ON; 970 else 971 mymagic = MAGIC_OFF; 972 973 for (; p[0] != NUL; mb_ptr_adv(p)) 974 { Line 1257 to 12631257 mymagic = MAGIC_ON; 1258 else 1259 mymagic = MAGIC_OFF; 1260 get_cpo_flags(); 1261 1262 for (; p[0] != NUL; mb_ptr_adv(p)) 1263 {
Line 1008 to 10151008 return p; 1009 } 1010 1011 /* 1012 * vim_regcomp() - compile a regular expression into internal code 1013 * Returns the program in allocated space. Returns NULL for an error. 1014 * 1015 * We can't allocate space until we know how big the compiled form will be, Line 1297 to 13081297 return p; 1298 } 1299 1300 static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags)); 1301 static void bt_regfree __ARGS((regprog_T *prog)); 1302 1303 /* 1304 * bt_regcomp() - compile a regular expression into internal code for the 1305 * traditional back track matcher. 1306 * Returns the program in allocated space. Returns NULL for an error. 1307 * 1308 * We can't allocate space until we know how big the compiled form will be,
Line 1028 to 10391028 * of the structure of the compiled regexp. 1029 * "re_flags": RE_MAGIC and/or RE_STRING. 1030 */ 1031 regprog_T * 1032 vim_regcomp(expr, re_flags) 1033 char_u *expr; 1034 int re_flags; 1035 { 1036 regprog_T *r; 1037 char_u *scan; 1038 char_u *longest; 1039 int len; Line 1321 to 13321321 * of the structure of the compiled regexp. 1322 * "re_flags": RE_MAGIC and/or RE_STRING. 1323 */ 1324 static regprog_T * 1325 bt_regcomp(expr, re_flags) 1326 char_u *expr; 1327 int re_flags; 1328 { 1329 bt_regprog_T *r; 1330 char_u *scan; 1331 char_u *longest; 1332 int len;
Line 1060 to 10661060 #endif 1061 1062 /* Allocate space. */ 1063 r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE); 1064 if (r == NULL) 1065 return NULL; 1066 Line 1353 to 13591353 #endif 1354 1355 /* Allocate space. */ 1356 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE); 1357 if (r == NULL) 1358 return NULL; 1359
Line 1155 to 11641155 r->regmlen = len; 1156 } 1157 } 1158 #ifdef DEBUG 1159 regdump(expr, r); 1160 #endif 1161 return r; 1162 } 1163 1164 /* Line 1448 to 14681448 r->regmlen = len; 1449 } 1450 } 1451 #ifdef BT_REGEXP_DUMP 1452 regdump(expr, r); 1453 #endif 1454 r->engine = &bt_regengine; 1455 return (regprog_T *)r; 1456 } 1457 1458 /* 1459 * Free a compiled regexp program, returned by bt_regcomp(). 1460 */ 1461 static void 1462 bt_regfree(prog) 1463 regprog_T *prog; 1464 { 1465 vim_free(prog); 1466 } 1467 1468 /*
Line 1176 to 11811176 reg_magic = MAGIC_OFF; 1177 reg_string = (re_flags & RE_STRING); 1178 reg_strict = (re_flags & RE_STRICT); 1179 1180 num_complex_braces = 0; 1181 regnpar = 1; Line 1480 to 14861480 reg_magic = MAGIC_OFF; 1481 reg_string = (re_flags & RE_STRING); 1482 reg_strict = (re_flags & RE_STRICT); 1483 get_cpo_flags(); 1484 1485 num_complex_braces = 0; 1486 regnpar = 1;
Line 1205 to 12111205 #endif 1206 1207 /* 1208 * reg - regular expression, i.e. main body or parenthesized thing 1209 * 1210 * Caller must absorb opening parenthesis. 1211 * Line 1510 to 15161510 #endif 1511 1512 /* 1513 * Parse regular expression, i.e. main body or parenthesized thing. 1514 * 1515 * Caller must absorb opening parenthesis. 1516 *
Line 1242 to 12481242 { 1243 /* Make a MOPEN node. */ 1244 if (regnpar >= NSUBEXP) 1245 EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); 1246 parno = regnpar; 1247 ++regnpar; 1248 ret = regnode(MOPEN + parno); Line 1547 to 15531547 { 1548 /* Make a MOPEN node. */ 1549 if (regnpar >= NSUBEXP) 1550 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); 1551 parno = regnpar; 1552 ++regnpar; 1553 ret = regnode(MOPEN + parno);
Line 1303 to 13161303 else 1304 #endif 1305 if (paren == REG_NPAREN) 1306 EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL); 1307 else 1308 EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL); 1309 } 1310 else if (paren == REG_NOPAREN && peekchr() != NUL) 1311 { 1312 if (curchr == Magic(')')) 1313 EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL); 1314 else 1315 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ 1316 /* NOTREACHED */ Line 1608 to 16211608 else 1609 #endif 1610 if (paren == REG_NPAREN) 1611 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); 1612 else 1613 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); 1614 } 1615 else if (paren == REG_NOPAREN && peekchr() != NUL) 1616 { 1617 if (curchr == Magic(')')) 1618 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); 1619 else 1620 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ 1621 /* NOTREACHED */
Line 1325 to 13311325 } 1326 1327 /* 1328 * Handle one alternative of an | operator. 1329 * Implements the & operator. 1330 */ 1331 static char_u * Line 1630 to 16361630 } 1631 1632 /* 1633 * Parse one alternative of an | operator. 1634 * Implements the & operator. 1635 */ 1636 static char_u *
Line 1368 to 13741368 } 1369 1370 /* 1371 * Handle one alternative of an | or & operator. 1372 * Implements the concatenation operator. 1373 */ 1374 static char_u * Line 1673 to 16791673 } 1674 1675 /* 1676 * Parse one alternative of an | or & operator. 1677 * Implements the concatenation operator. 1678 */ 1679 static char_u *
Line 1448 to 14541448 } 1449 1450 /* 1451 * regpiece - something followed by possible [*+=] 1452 * 1453 * Note that the branching code sequences used for = and the general cases 1454 * of * and + are somewhat optimized: they use the same NOTHING node as Line 1753 to 17591753 } 1754 1755 /* 1756 * Parse something followed by possible [*+=]. 1757 * 1758 * Note that the branching code sequences used for = and the general cases 1759 * of * and + are somewhat optimized: they use the same NOTHING node as
Line 1515 to 15211515 case Magic('@'): 1516 { 1517 int lop = END; 1518 1519 switch (no_Magic(getchr())) 1520 { 1521 case '=': lop = MATCH; break; /* \@= */ Line 1820 to 18281820 case Magic('@'): 1821 { 1822 int lop = END; 1823 int nr; 1824 1825 nr = getdecchrs(); 1826 switch (no_Magic(getchr())) 1827 { 1828 case '=': lop = MATCH; break; /* \@= */
Line 1528 to 15341528 } 1529 } 1530 if (lop == END) 1531 EMSG_M_RET_NULL(_("E59: invalid character after %s@"), 1532 reg_magic == MAGIC_ALL); 1533 /* Look behind must match with behind_pos. */ 1534 if (lop == BEHIND || lop == NOBEHIND) Line 1835 to 18411835 } 1836 } 1837 if (lop == END) 1838 EMSG2_RET_NULL(_("E59: invalid character after %s@"), 1839 reg_magic == MAGIC_ALL); 1840 /* Look behind must match with behind_pos. */ 1841 if (lop == BEHIND || lop == NOBEHIND)
Line 1537 to 15431537 *flagp |= HASLOOKBH; 1538 } 1539 regtail(ret, regnode(END)); /* operand ends */ 1540 reginsert(lop, ret); 1541 break; 1542 } 1543 Line 1844 to 18571844 *flagp |= HASLOOKBH; 1845 } 1846 regtail(ret, regnode(END)); /* operand ends */ 1847 if (lop == BEHIND || lop == NOBEHIND) 1848 { 1849 if (nr < 0) 1850 nr = 0; /* no limit is same as zero limit */ 1851 reginsert_nr(lop, nr, ret); 1852 } 1853 else 1854 reginsert(lop, ret); 1855 break; 1856 } 1857
Line 1562 to 15681562 else 1563 { 1564 if (num_complex_braces >= 10) 1565 EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"), 1566 reg_magic == MAGIC_ALL); 1567 reginsert(BRACE_COMPLEX + num_complex_braces, ret); 1568 regoptail(ret, regnode(BACK)); Line 1876 to 18821876 else 1877 { 1878 if (num_complex_braces >= 10) 1879 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), 1880 reg_magic == MAGIC_ALL); 1881 reginsert(BRACE_COMPLEX + num_complex_braces, ret); 1882 regoptail(ret, regnode(BACK));
Line 1588 to 15961588 1589 return ret; 1590 } 1591 1592 /* 1593 * regatom - the lowest level 1594 * 1595 * Optimization: gobbles an entire sequence of ordinary characters so that 1596 * it can turn them into a single node, which is smaller to store and Line 1902 to 19221902 1903 return ret; 1904 } 1905 1906 /* When making changes to classchars also change nfa_classcodes. */ 1907 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; 1908 static int classcodes[] = { 1909 ANY, IDENT, SIDENT, KWORD, SKWORD, 1910 FNAME, SFNAME, PRINT, SPRINT, 1911 WHITE, NWHITE, DIGIT, NDIGIT, 1912 HEX, NHEX, OCTAL, NOCTAL, 1913 WORD, NWORD, HEAD, NHEAD, 1914 ALPHA, NALPHA, LOWER, NLOWER, 1915 UPPER, NUPPER 1916 }; 1917 1918 /* 1919 * Parse the lowest level. 1920 * 1921 * Optimization: gobbles an entire sequence of ordinary characters so that 1922 * it can turn them into a single node, which is smaller to store and
Line 1602 to 16251602 { 1603 char_u *ret; 1604 int flags; 1605 int cpo_lit; /* 'cpoptions' contains 'l' flag */ 1606 int cpo_bsl; /* 'cpoptions' contains '\' flag */ 1607 int c; 1608 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; 1609 static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD, 1610 FNAME, SFNAME, PRINT, SPRINT, 1611 WHITE, NWHITE, DIGIT, NDIGIT, 1612 HEX, NHEX, OCTAL, NOCTAL, 1613 WORD, NWORD, HEAD, NHEAD, 1614 ALPHA, NALPHA, LOWER, NLOWER, 1615 UPPER, NUPPER 1616 }; 1617 char_u *p; 1618 int extra = 0; 1619 1620 *flagp = WORST; /* Tentatively. */ 1621 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL; 1622 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL; 1623 1624 c = getchr(); 1625 switch (c) Line 1928 to 19381928 { 1929 char_u *ret; 1930 int flags; 1931 int c; 1932 char_u *p; 1933 int extra = 0; 1934 1935 *flagp = WORST; /* Tentatively. */ 1936 1937 c = getchr(); 1938 switch (c)
Line 1826 to 18321826 { 1827 #ifdef FEAT_SYN_HL 1828 case '(': if (reg_do_extmatch != REX_SET) 1829 EMSG_RET_NULL(_("E66: \\z( not allowed here")); 1830 if (one_exactly) 1831 EMSG_ONE_RET_NULL; 1832 ret = reg(REG_ZPAREN, &flags); Line 2139 to 21452139 { 2140 #ifdef FEAT_SYN_HL 2141 case '(': if (reg_do_extmatch != REX_SET) 2142 EMSG_RET_NULL(_(e_z_not_allowed)); 2143 if (one_exactly) 2144 EMSG_ONE_RET_NULL; 2145 ret = reg(REG_ZPAREN, &flags);
Line 1845 to 18511845 case '7': 1846 case '8': 1847 case '9': if (reg_do_extmatch != REX_USE) 1848 EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here")); 1849 ret = regnode(ZREF + c - '0'); 1850 re_has_z = REX_USE; 1851 break; Line 2158 to 21642158 case '7': 2159 case '8': 2160 case '9': if (reg_do_extmatch != REX_USE) 2161 EMSG_RET_NULL(_(e_z1_not_allowed)); 2162 ret = regnode(ZREF + c - '0'); 2163 re_has_z = REX_USE; 2164 break;
Line 1909 to 19151909 while ((c = getchr()) != ']') 1910 { 1911 if (c == NUL) 1912 EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["), 1913 reg_magic == MAGIC_ALL); 1914 br = regnode(BRANCH); 1915 if (ret == NULL) Line 2222 to 22282222 while ((c = getchr()) != ']') 2223 { 2224 if (c == NUL) 2225 EMSG2_RET_NULL(_(e_missing_sb), 2226 reg_magic == MAGIC_ALL); 2227 br = regnode(BRANCH); 2228 if (ret == NULL)
Line 1925 to 19311925 return NULL; 1926 } 1927 if (ret == NULL) 1928 EMSG_M_RET_NULL(_("E70: Empty %s%%[]"), 1929 reg_magic == MAGIC_ALL); 1930 lastbranch = regnode(BRANCH); 1931 br = regnode(NOTHING); Line 2238 to 22442238 return NULL; 2239 } 2240 if (ret == NULL) 2241 EMSG2_RET_NULL(_(e_empty_sb), 2242 reg_magic == MAGIC_ALL); 2243 lastbranch = regnode(BRANCH); 2244 br = regnode(NOTHING);
Line 1969 to 19751969 } 1970 1971 if (i < 0) 1972 EMSG_M_RET_NULL( 1973 _("E678: Invalid character after %s%%[dxouU]"), 1974 reg_magic == MAGIC_ALL); 1975 #ifdef FEAT_MBYTE Line 2282 to 22882282 } 2283 2284 if (i < 0) 2285 EMSG2_RET_NULL( 2286 _("E678: Invalid character after %s%%[dxouU]"), 2287 reg_magic == MAGIC_ALL); 2288 #ifdef FEAT_MBYTE
Line 2041 to 20472041 } 2042 } 2043 2044 EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"), 2045 reg_magic == MAGIC_ALL); 2046 } 2047 } Line 2354 to 23602354 } 2355 } 2356 2357 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), 2358 reg_magic == MAGIC_ALL); 2359 } 2360 }
Line 2112 to 21182112 } 2113 2114 /* Handle \o40, \x20 and \u20AC style sequences */ 2115 if (endc == '\\' && !cpo_lit && !cpo_bsl) 2116 endc = coll_get_char(); 2117 2118 if (startc > endc) Line 2425 to 24312425 } 2426 2427 /* Handle \o40, \x20 and \u20AC style sequences */ 2428 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl) 2429 endc = coll_get_char(); 2430 2431 if (startc > endc)
Line 2154 to 21622154 * Posix doesn't recognize backslash at all. 2155 */ 2156 else if (*regparse == '\\' 2157 && !cpo_bsl 2158 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL 2159 || (!cpo_lit 2160 && vim_strchr(REGEXP_ABBR, 2161 regparse[1]) != NULL))) 2162 { Line 2467 to 24752467 * Posix doesn't recognize backslash at all. 2468 */ 2469 else if (*regparse == '\\' 2470 && !reg_cpo_bsl 2471 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL 2472 || (!reg_cpo_lit 2473 && vim_strchr(REGEXP_ABBR, 2474 regparse[1]) != NULL))) 2475 {
Line 2166 to 21782166 /* '\n' in range: also match NL */ 2167 if (ret != JUST_CALC_SIZE) 2168 { 2169 if (*ret == ANYBUT) 2170 *ret = ANYBUT + ADD_NL; 2171 else if (*ret == ANYOF) 2172 *ret = ANYOF + ADD_NL; 2173 /* else: must have had a \n already */ 2174 } 2175 *flagp |= HASNL; 2176 regparse++; 2177 startc = -1; 2178 } Line 2479 to 24932479 /* '\n' in range: also match NL */ 2480 if (ret != JUST_CALC_SIZE) 2481 { 2482 /* Using \n inside [^] does not change what 2483 * matches. "[^\n]" is the same as ".". */ 2484 if (*ret == ANYOF) 2485 { 2486 *ret = ANYOF + ADD_NL; 2487 *flagp |= HASNL; 2488 } 2489 /* else: must have had a \n already */ 2490 } 2491 regparse++; 2492 startc = -1; 2493 }
Line 2334 to 23412334 break; 2335 } 2336 else if (reg_strict) 2337 EMSG_M_RET_NULL(_("E769: Missing ] after %s["), 2338 reg_magic > MAGIC_OFF); 2339 } 2340 /* FALLTHROUGH */ 2341 Line 2649 to 26552649 break; 2650 } 2651 else if (reg_strict) 2652 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); 2653 } 2654 /* FALLTHROUGH */ 2655
Line 2426 to 24322426 #endif 2427 2428 /* 2429 * emit a node 2430 * Return pointer to generated code. 2431 */ 2432 static char_u * Line 2740 to 27462740 #endif 2741 2742 /* 2743 * Emit a node. 2744 * Return pointer to generated code. 2745 */ 2746 static char_u *
Line 2468 to 24732468 regmbc(c) 2469 int c; 2470 { 2471 if (regcode == JUST_CALC_SIZE) 2472 regsize += (*mb_char2len)(c); 2473 else Line 2782 to 27892782 regmbc(c) 2783 int c; 2784 { 2785 if (!has_mbyte && c > 0xff) 2786 return; 2787 if (regcode == JUST_CALC_SIZE) 2788 regsize += (*mb_char2len)(c); 2789 else
Line 2476 to 24822476 #endif 2477 2478 /* 2479 * reginsert - insert an operator in front of already-emitted operand 2480 * 2481 * Means relocating the operand. 2482 */ Line 2792 to 27982792 #endif 2793 2794 /* 2795 * Insert an operator in front of already-emitted operand 2796 * 2797 * Means relocating the operand. 2798 */
Line 2507 to 25132507 } 2508 2509 /* 2510 * reginsert_limits - insert an operator in front of already-emitted operand. 2511 * The operator has the given limit values as operands. Also set next pointer. 2512 * 2513 * Means relocating the operand. Line 2823 to 28612823 } 2824 2825 /* 2826 * Insert an operator in front of already-emitted operand. 2827 * Add a number to the operator. 2828 */ 2829 static void 2830 reginsert_nr(op, val, opnd) 2831 int op; 2832 long val; 2833 char_u *opnd; 2834 { 2835 char_u *src; 2836 char_u *dst; 2837 char_u *place; 2838 2839 if (regcode == JUST_CALC_SIZE) 2840 { 2841 regsize += 7; 2842 return; 2843 } 2844 src = regcode; 2845 regcode += 7; 2846 dst = regcode; 2847 while (src > opnd) 2848 *--dst = *--src; 2849 2850 place = opnd; /* Op node, where operand used to be. */ 2851 *place++ = op; 2852 *place++ = NUL; 2853 *place++ = NUL; 2854 place = re_put_long(place, (long_u)val); 2855 } 2856 2857 /* 2858 * Insert an operator in front of already-emitted operand. 2859 * The operator has the given limit values as operands. Also set next pointer. 2860 * 2861 * Means relocating the operand.
Line 2559 to 25652559 } 2560 2561 /* 2562 * regtail - set the next-pointer at the end of a node chain 2563 */ 2564 static void 2565 regtail(p, val) Line 2907 to 29132907 } 2908 2909 /* 2910 * Set the next-pointer at the end of a node chain. 2911 */ 2912 static void 2913 regtail(p, val)
Line 2588 to 25942588 else 2589 offset = (int)(val - scan); 2590 /* When the offset uses more than 16 bits it can no longer fit in the two 2591 * bytes avaliable. Use a global flag to avoid having to check return 2592 * values in too many places. */ 2593 if (offset > 0xffff) 2594 reg_toolong = TRUE; Line 2936 to 29422936 else 2937 offset = (int)(val - scan); 2938 /* When the offset uses more than 16 bits it can no longer fit in the two 2939 * bytes available. Use a global flag to avoid having to check return 2940 * values in too many places. */ 2941 if (offset > 0xffff) 2942 reg_toolong = TRUE;
Line 2600 to 26062600 } 2601 2602 /* 2603 * regoptail - regtail on item after a BRANCH; nop if none 2604 */ 2605 static void 2606 regoptail(p, val) Line 2948 to 29542948 } 2949 2950 /* 2951 * Like regtail, on item after a BRANCH; nop if none. 2952 */ 2953 static void 2954 regoptail(p, val)
Line 2616 to 26372616 } 2617 2618 /* 2619 * getchr() - get the next character from the pattern. We know about 2620 * magic and such, so therefore we need a lexical analyzer. 2621 */ 2622 2623 /* static int curchr; */ 2624 static int prevprevchr; 2625 static int prevchr; 2626 static int nextchr; /* used for ungetchr() */ 2627 /* 2628 * Note: prevchr is sometimes -1 when we are not at the start, 2629 * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was 2630 * taken to be magic -- webb 2631 */ 2632 static int at_start; /* True when on the first character */ 2633 static int prev_at_start; /* True when on the second character */ 2634 2635 static void 2636 initchr(str) 2637 char_u *str; Line 2964 to 29782964 } 2965 2966 /* 2967 * Functions for getting characters from the regexp input. 2968 */ 2969 2970 static int at_start; /* True when on the first character */ 2971 static int prev_at_start; /* True when on the second character */ 2972 2973 /* 2974 * Start parsing at "str". 2975 */ 2976 static void 2977 initchr(str) 2978 char_u *str;
Line 2643 to 26482643 prev_at_start = FALSE; 2644 } 2645 2646 static int 2647 peekchr() 2648 { Line 2984 to 30302984 prev_at_start = FALSE; 2985 } 2986 2987 /* 2988 * Save the current parse state, so that it can be restored and parsing 2989 * starts in the same state again. 2990 */ 2991 static void 2992 save_parse_state(ps) 2993 parse_state_T *ps; 2994 { 2995 ps->regparse = regparse; 2996 ps->prevchr_len = prevchr_len; 2997 ps->curchr = curchr; 2998 ps->prevchr = prevchr; 2999 ps->prevprevchr = prevprevchr; 3000 ps->nextchr = nextchr; 3001 ps->at_start = at_start; 3002 ps->prev_at_start = prev_at_start; 3003 ps->regnpar = regnpar; 3004 } 3005 3006 /* 3007 * Restore a previously saved parse state. 3008 */ 3009 static void 3010 restore_parse_state(ps) 3011 parse_state_T *ps; 3012 { 3013 regparse = ps->regparse; 3014 prevchr_len = ps->prevchr_len; 3015 curchr = ps->curchr; 3016 prevchr = ps->prevchr; 3017 prevprevchr = ps->prevprevchr; 3018 nextchr = ps->nextchr; 3019 at_start = ps->at_start; 3020 prev_at_start = ps->prev_at_start; 3021 regnpar = ps->regnpar; 3022 } 3023 3024 3025 /* 3026 * Get the next character without advancing. 3027 */ 3028 static int 3029 peekchr() 3030 {
Line 2851 to 28562851 prevprevchr = prpr; 2852 } 2853 2854 static int 2855 getchr() 2856 { Line 3233 to 32423233 prevprevchr = prpr; 3234 } 3235 3236 /* 3237 * Get the next character from the pattern. We know about magic and such, so 3238 * therefore we need a lexical analyzer. 3239 */ 3240 static int 3241 getchr() 3242 {
Line 2910 to 29162910 } 2911 2912 /* 2913 * get and return the value of the decimal string immediately after the 2914 * current position. Return -1 for invalid. Consumes all digits. 2915 */ 2916 static int Line 3296 to 33023296 } 3297 3298 /* 3299 * Get and return the value of the decimal string immediately after the 3300 * current position. Return -1 for invalid. Consumes all digits. 3301 */ 3302 static int
Line 2928 to 29332928 nr *= 10; 2929 nr += c - '0'; 2930 ++regparse; 2931 } 2932 2933 if (i == 0) Line 3314 to 33203314 nr *= 10; 3315 nr += c - '0'; 3316 ++regparse; 3317 curchr = -1; /* no longer valid */ 3318 } 3319 3320 if (i == 0)
Line 3105 to 31123105 } regbehind_T; 3106 3107 static char_u *reg_getline __ARGS((linenr_T lnum)); 3108 static long vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); 3109 static long regtry __ARGS((regprog_T *prog, colnr_T col)); 3110 static void cleanup_subexpr __ARGS((void)); 3111 #ifdef FEAT_SYN_HL 3112 static void cleanup_zsubexpr __ARGS((void)); Line 3492 to 34993492 } regbehind_T; 3493 3494 static char_u *reg_getline __ARGS((linenr_T lnum)); 3495 static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); 3496 static long regtry __ARGS((bt_regprog_T *prog, colnr_T col)); 3497 static void cleanup_subexpr __ARGS((void)); 3498 #ifdef FEAT_SYN_HL 3499 static void cleanup_zsubexpr __ARGS((void));
Line 3132 to 31373132 *(pp) = (savep)->se_u.ptr; } 3133 3134 static int re_num_cmp __ARGS((long_u val, char_u *scan)); 3135 static int regmatch __ARGS((char_u *prog)); 3136 static int regrepeat __ARGS((char_u *p, long maxcount)); 3137 Line 3519 to 35253519 *(pp) = (savep)->se_u.ptr; } 3520 3521 static int re_num_cmp __ARGS((long_u val, char_u *scan)); 3522 static int match_with_backref __ARGS((linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen)); 3523 static int regmatch __ARGS((char_u *prog)); 3524 static int regrepeat __ARGS((char_u *p, long maxcount)); 3525
Line 3163 to 31693163 /* 3164 * Sometimes need to save a copy of a line. Since alloc()/free() is very 3165 * slow, we keep one allocated piece of memory and only re-allocate it when 3166 * it's too small. It's freed in vim_regexec_both() when finished. 3167 */ 3168 static char_u *reg_tofree = NULL; 3169 static unsigned reg_tofreelen; Line 3551 to 35573551 /* 3552 * Sometimes need to save a copy of a line. Since alloc()/free() is very 3553 * slow, we keep one allocated piece of memory and only re-allocate it when 3554 * it's too small. It's freed in bt_regexec_both() when finished. 3555 */ 3556 static char_u *reg_tofree = NULL; 3557 static unsigned reg_tofreelen;
Line 3180 to 31863180 * reg_startpos <invalid> reg_mmatch->startpos 3181 * reg_endpos <invalid> reg_mmatch->endpos 3182 * reg_win NULL window in which to search 3183 * reg_buf <invalid> buffer in which to search 3184 * reg_firstlnum <invalid> first line in which to search 3185 * reg_maxline 0 last line nr 3186 * reg_line_lbr FALSE or TRUE FALSE Line 3568 to 35743568 * reg_startpos <invalid> reg_mmatch->startpos 3569 * reg_endpos <invalid> reg_mmatch->endpos 3570 * reg_win NULL window in which to search 3571 * reg_buf curbuf buffer in which to search 3572 * reg_firstlnum <invalid> first line in which to search 3573 * reg_maxline 0 last line nr 3574 * reg_line_lbr FALSE or TRUE FALSE
Line 3320 to 33253320 3321 /* TRUE if using multi-line regexp. */ 3322 #define REG_MULTI (reg_match == NULL) 3323 3324 /* 3325 * Match a regexp against a string. Line 3708 to 37153708 3709 /* TRUE if using multi-line regexp. */ 3710 #define REG_MULTI (reg_match == NULL) 3711 3712 static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); 3713 3714 /* 3715 * Match a regexp against a string.
Line 3328 to 33353328 * 3329 * Return TRUE if there is a match, FALSE if not. 3330 */ 3331 int 3332 vim_regexec(rmp, line, col) 3333 regmatch_T *rmp; 3334 char_u *line; /* string to match against */ 3335 colnr_T col; /* column to start looking for match */ Line 3718 to 37253718 * 3719 * Return TRUE if there is a match, FALSE if not. 3720 */ 3721 static int 3722 bt_regexec(rmp, line, col) 3723 regmatch_T *rmp; 3724 char_u *line; /* string to match against */ 3725 colnr_T col; /* column to start looking for match */
Line 3338 to 33593338 reg_mmatch = NULL; 3339 reg_maxline = 0; 3340 reg_line_lbr = FALSE; 3341 reg_win = NULL; 3342 ireg_ic = rmp->rm_ic; 3343 #ifdef FEAT_MBYTE 3344 ireg_icombine = FALSE; 3345 #endif 3346 ireg_maxcol = 0; 3347 return (vim_regexec_both(line, col, NULL) != 0); 3348 } 3349 3350 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ 3351 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) 3352 /* 3353 * Like vim_regexec(), but consider a "\n" in "line" to be a line break. 3354 */ 3355 int 3356 vim_regexec_nl(rmp, line, col) 3357 regmatch_T *rmp; 3358 char_u *line; /* string to match against */ 3359 colnr_T col; /* column to start looking for match */ Line 3728 to 37533728 reg_mmatch = NULL; 3729 reg_maxline = 0; 3730 reg_line_lbr = FALSE; 3731 reg_buf = curbuf; 3732 reg_win = NULL; 3733 ireg_ic = rmp->rm_ic; 3734 #ifdef FEAT_MBYTE 3735 ireg_icombine = FALSE; 3736 #endif 3737 ireg_maxcol = 0; 3738 return (bt_regexec_both(line, col, NULL) != 0); 3739 } 3740 3741 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ 3742 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) 3743 3744 static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); 3745 3746 /* 3747 * Like vim_regexec(), but consider a "\n" in "line" to be a line break. 3748 */ 3749 static int 3750 bt_regexec_nl(rmp, line, col) 3751 regmatch_T *rmp; 3752 char_u *line; /* string to match against */ 3753 colnr_T col; /* column to start looking for match */
Line 3362 to 33763362 reg_mmatch = NULL; 3363 reg_maxline = 0; 3364 reg_line_lbr = TRUE; 3365 reg_win = NULL; 3366 ireg_ic = rmp->rm_ic; 3367 #ifdef FEAT_MBYTE 3368 ireg_icombine = FALSE; 3369 #endif 3370 ireg_maxcol = 0; 3371 return (vim_regexec_both(line, col, NULL) != 0); 3372 } 3373 #endif 3374 3375 /* 3376 * Match a regexp against multiple lines. Line 3756 to 37733756 reg_mmatch = NULL; 3757 reg_maxline = 0; 3758 reg_line_lbr = TRUE; 3759 reg_buf = curbuf; 3760 reg_win = NULL; 3761 ireg_ic = rmp->rm_ic; 3762 #ifdef FEAT_MBYTE 3763 ireg_icombine = FALSE; 3764 #endif 3765 ireg_maxcol = 0; 3766 return (bt_regexec_both(line, col, NULL) != 0); 3767 } 3768 #endif 3769 3770 static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); 3771 3772 /* 3773 * Match a regexp against multiple lines.
Line 3380 to 33873380 * Return zero if there is no match. Return number of lines contained in the 3381 * match otherwise. 3382 */ 3383 long 3384 vim_regexec_multi(rmp, win, buf, lnum, col, tm) 3385 regmmatch_T *rmp; 3386 win_T *win; /* window in which to search or NULL */ 3387 buf_T *buf; /* buffer in which to search */ Line 3777 to 37843777 * Return zero if there is no match. Return number of lines contained in the 3778 * match otherwise. 3779 */ 3780 static long 3781 bt_regexec_multi(rmp, win, buf, lnum, col, tm) 3782 regmmatch_T *rmp; 3783 win_T *win; /* window in which to search or NULL */ 3784 buf_T *buf; /* buffer in which to search */
Line 3390 to 33963390 proftime_T *tm; /* timeout limit or NULL */ 3391 { 3392 long r; 3393 buf_T *save_curbuf = curbuf; 3394 3395 reg_match = NULL; 3396 reg_mmatch = rmp; Line 3787 to 37923787 proftime_T *tm; /* timeout limit or NULL */ 3788 { 3789 long r; 3790 3791 reg_match = NULL; 3792 reg_mmatch = rmp;
Line 3405 to 34143405 #endif 3406 ireg_maxcol = rmp->rmm_maxcol; 3407 3408 /* Need to switch to buffer "buf" to make vim_iswordc() work. */ 3409 curbuf = buf; 3410 r = vim_regexec_both(NULL, col, tm); 3411 curbuf = save_curbuf; 3412 3413 return r; 3414 } Line 3801 to 38073801 #endif 3802 ireg_maxcol = rmp->rmm_maxcol; 3803 3804 r = bt_regexec_both(NULL, col, tm); 3805 3806 return r; 3807 }
Line 3418 to 34293418 * lines ("line" is NULL, use reg_getline()). 3419 */ 3420 static long 3421 vim_regexec_both(line, col, tm) 3422 char_u *line; 3423 colnr_T col; /* column to start looking for match */ 3424 proftime_T *tm UNUSED; /* timeout limit or NULL */ 3425 { 3426 regprog_T *prog; 3427 char_u *s; 3428 long retval = 0L; 3429 Line 3811 to 38223811 * lines ("line" is NULL, use reg_getline()). 3812 */ 3813 static long 3814 bt_regexec_both(line, col, tm) 3815 char_u *line; 3816 colnr_T col; /* column to start looking for match */ 3817 proftime_T *tm UNUSED; /* timeout limit or NULL */ 3818 { 3819 bt_regprog_T *prog; 3820 char_u *s; 3821 long retval = 0L; 3822
Line 3449 to 34623449 3450 if (REG_MULTI) 3451 { 3452 prog = reg_mmatch->regprog; 3453 line = reg_getline((linenr_T)0); 3454 reg_startpos = reg_mmatch->startpos; 3455 reg_endpos = reg_mmatch->endpos; 3456 } 3457 else 3458 { 3459 prog = reg_match->regprog; 3460 reg_startp = reg_match->startp; 3461 reg_endp = reg_match->endp; 3462 } Line 3842 to 38553842 3843 if (REG_MULTI) 3844 { 3845 prog = (bt_regprog_T *)reg_mmatch->regprog; 3846 line = reg_getline((linenr_T)0); 3847 reg_startpos = reg_mmatch->startpos; 3848 reg_endpos = reg_mmatch->endpos; 3849 } 3850 else 3851 { 3852 prog = (bt_regprog_T *)reg_match->regprog; 3853 reg_startp = reg_match->startp; 3854 reg_endp = reg_match->endp; 3855 }
Line 3538 to 35433538 3539 regline = line; 3540 reglnum = 0; 3541 3542 /* Simplest case: Anchored match need be tried only once. */ 3543 if (prog->reganch) Line 3931 to 39373931 3932 regline = line; 3933 reglnum = 0; 3934 reg_toolong = FALSE; 3935 3936 /* Simplest case: Anchored match need be tried only once. */ 3937 if (prog->reganch)
Line 3697 to 37033697 */ 3698 static long 3699 regtry(prog, col) 3700 regprog_T *prog; 3701 colnr_T col; 3702 { 3703 reginput = regline + col; Line 4091 to 40974091 */ 4092 static long 4093 regtry(prog, col) 4094 bt_regprog_T *prog; 4095 colnr_T col; 4096 { 4097 reginput = regline + col;
Line 3781 to 37923781 reg_prev_class() 3782 { 3783 if (reginput > regline) 3784 return mb_get_class(reginput - 1 3785 - (*mb_head_off)(regline, reginput - 1)); 3786 return -1; 3787 } 3788 3789 #endif 3790 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput) 3791 3792 /* Line 4175 to 42654175 reg_prev_class() 4176 { 4177 if (reginput > regline) 4178 return mb_get_class_buf(reginput - 1 4179 - (*mb_head_off)(regline, reginput - 1), reg_buf); 4180 return -1; 4181 } 4182 4183 #endif 4184 #ifdef FEAT_VISUAL 4185 static int reg_match_visual __ARGS((void)); 4186 4187 /* 4188 * Return TRUE if the current reginput position matches the Visual area. 4189 */ 4190 static int 4191 reg_match_visual() 4192 { 4193 pos_T top, bot; 4194 linenr_T lnum; 4195 colnr_T col; 4196 win_T *wp = reg_win == NULL ? curwin : reg_win; 4197 int mode; 4198 colnr_T start, end; 4199 colnr_T start2, end2; 4200 colnr_T cols; 4201 4202 /* Check if the buffer is the current buffer. */ 4203 if (reg_buf != curbuf || VIsual.lnum == 0) 4204 return FALSE; 4205 4206 if (VIsual_active) 4207 { 4208 if (lt(VIsual, wp->w_cursor)) 4209 { 4210 top = VIsual; 4211 bot = wp->w_cursor; 4212 } 4213 else 4214 { 4215 top = wp->w_cursor; 4216 bot = VIsual; 4217 } 4218 mode = VIsual_mode; 4219 } 4220 else 4221 { 4222 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end)) 4223 { 4224 top = curbuf->b_visual.vi_start; 4225 bot = curbuf->b_visual.vi_end; 4226 } 4227 else 4228 { 4229 top = curbuf->b_visual.vi_end; 4230 bot = curbuf->b_visual.vi_start; 4231 } 4232 mode = curbuf->b_visual.vi_mode; 4233 } 4234 lnum = reglnum + reg_firstlnum; 4235 if (lnum < top.lnum || lnum > bot.lnum) 4236 return FALSE; 4237 4238 if (mode == 'v') 4239 { 4240 col = (colnr_T)(reginput - regline); 4241 if ((lnum == top.lnum && col < top.col) 4242 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e'))) 4243 return FALSE; 4244 } 4245 else if (mode == Ctrl_V) 4246 { 4247 getvvcol(wp, &top, &start, NULL, &end); 4248 getvvcol(wp, &bot, &start2, NULL, &end2); 4249 if (start2 < start) 4250 start = start2; 4251 if (end2 > end) 4252 end = end2; 4253 if (top.col == MAXCOL || bot.col == MAXCOL) 4254 end = MAXCOL; 4255 cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline)); 4256 if (cols < start || cols > end - (*p_sel == 'e')) 4257 return FALSE; 4258 } 4259 return TRUE; 4260 } 4261 #endif 4262 4263 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput) 4264 4265 /*
Line 3829 to 38353829 #define RA_NOMATCH 5 /* didn't match */ 3830 3831 /* Make "regstack" and "backpos" empty. They are allocated and freed in 3832 * vim_regexec_both() to reduce malloc()/free() calls. */ 3833 regstack.ga_len = 0; 3834 backpos.ga_len = 0; 3835 Line 4302 to 43084302 #define RA_NOMATCH 5 /* didn't match */ 4303 4304 /* Make "regstack" and "backpos" empty. They are allocated and freed in 4305 * bt_regexec_both() to reduce malloc()/free() calls. */ 4306 regstack.ga_len = 0; 4307 backpos.ga_len = 0; 4308
Line 3838 to 38513838 */ 3839 for (;;) 3840 { 3841 /* Some patterns my cause a long time to match, even though they are not 3842 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */ 3843 fast_breakcheck(); 3844 3845 #ifdef DEBUG 3846 if (scan != NULL && regnarrate) 3847 { 3848 mch_errmsg(regprop(scan)); 3849 mch_errmsg("(\n"); 3850 } 3851 #endif Line 4311 to 43244311 */ 4312 for (;;) 4313 { 4314 /* Some patterns may cause a long time to match, even though they are not 4315 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */ 4316 fast_breakcheck(); 4317 4318 #ifdef DEBUG 4319 if (scan != NULL && regnarrate) 4320 { 4321 mch_errmsg((char *)regprop(scan)); 4322 mch_errmsg("(\n"); 4323 } 4324 #endif
Line 3866 to 38723866 #ifdef DEBUG 3867 if (regnarrate) 3868 { 3869 mch_errmsg(regprop(scan)); 3870 mch_errmsg("...\n"); 3871 # ifdef FEAT_SYN_HL 3872 if (re_extmatch_in != NULL) Line 4339 to 43454339 #ifdef DEBUG 4340 if (regnarrate) 4341 { 4342 mch_errmsg((char *)regprop(scan)); 4343 mch_errmsg("...\n"); 4344 # ifdef FEAT_SYN_HL 4345 if (re_extmatch_in != NULL)
Line 3878 to 38843878 { 3879 mch_errmsg(" \""); 3880 if (re_extmatch_in->matches[i] != NULL) 3881 mch_errmsg(re_extmatch_in->matches[i]); 3882 mch_errmsg("\"\n"); 3883 } 3884 } Line 4351 to 43574351 { 4352 mch_errmsg(" \""); 4353 if (re_extmatch_in->matches[i] != NULL) 4354 mch_errmsg((char *)re_extmatch_in->matches[i]); 4355 mch_errmsg("\"\n"); 4356 } 4357 }
Line 3944 to 39593944 break; 3945 3946 case RE_MARK: 3947 /* Compare the mark position to the match position. NOTE: Always 3948 * uses the current buffer. */ 3949 { 3950 int mark = OPERAND(scan)[0]; 3951 int cmp = OPERAND(scan)[1]; 3952 pos_T *pos; 3953 3954 pos = getmark(mark, FALSE); 3955 if (pos == NULL /* mark doesn't exist */ 3956 || pos->lnum <= 0 /* mark isn't set (in curbuf) */ 3957 || (pos->lnum == reglnum + reg_firstlnum 3958 ? (pos->col == (colnr_T)(reginput - regline) 3959 ? (cmp == '<' || cmp == '>') Line 4417 to 44314417 break; 4418 4419 case RE_MARK: 4420 /* Compare the mark position to the match position. */ 4421 { 4422 int mark = OPERAND(scan)[0]; 4423 int cmp = OPERAND(scan)[1]; 4424 pos_T *pos; 4425 4426 pos = getmark_buf(reg_buf, mark, FALSE); 4427 if (pos == NULL /* mark doesn't exist */ 4428 || pos->lnum <= 0 /* mark isn't set in reg_buf */ 4429 || (pos->lnum == reglnum + reg_firstlnum 4430 ? (pos->col == (colnr_T)(reginput - regline) 4431 ? (cmp == '<' || cmp == '>')
Line 3969 to 40483969 3970 case RE_VISUAL: 3971 #ifdef FEAT_VISUAL 3972 /* Check if the buffer is the current buffer. and whether the 3973 * position is inside the Visual area. */ 3974 if (reg_buf != curbuf || VIsual.lnum == 0) 3975 status = RA_NOMATCH; 3976 else 3977 { 3978 pos_T top, bot; 3979 linenr_T lnum; 3980 colnr_T col; 3981 win_T *wp = reg_win == NULL ? curwin : reg_win; 3982 int mode; 3983 3984 if (VIsual_active) 3985 { 3986 if (lt(VIsual, wp->w_cursor)) 3987 { 3988 top = VIsual; 3989 bot = wp->w_cursor; 3990 } 3991 else 3992 { 3993 top = wp->w_cursor; 3994 bot = VIsual; 3995 } 3996 mode = VIsual_mode; 3997 } 3998 else 3999 { 4000 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end)) 4001 { 4002 top = curbuf->b_visual.vi_start; 4003 bot = curbuf->b_visual.vi_end; 4004 } 4005 else 4006 { 4007 top = curbuf->b_visual.vi_end; 4008 bot = curbuf->b_visual.vi_start; 4009 } 4010 mode = curbuf->b_visual.vi_mode; 4011 } 4012 lnum = reglnum + reg_firstlnum; 4013 col = (colnr_T)(reginput - regline); 4014 if (lnum < top.lnum || lnum > bot.lnum) 4015 status = RA_NOMATCH; 4016 else if (mode == 'v') 4017 { 4018 if ((lnum == top.lnum && col < top.col) 4019 || (lnum == bot.lnum 4020 && col >= bot.col + (*p_sel != 'e'))) 4021 status = RA_NOMATCH; 4022 } 4023 else if (mode == Ctrl_V) 4024 { 4025 colnr_T start, end; 4026 colnr_T start2, end2; 4027 colnr_T cols; 4028 4029 getvvcol(wp, &top, &start, NULL, &end); 4030 getvvcol(wp, &bot, &start2, NULL, &end2); 4031 if (start2 < start) 4032 start = start2; 4033 if (end2 > end) 4034 end = end2; 4035 if (top.col == MAXCOL || bot.col == MAXCOL) 4036 end = MAXCOL; 4037 cols = win_linetabsize(wp, 4038 regline, (colnr_T)(reginput - regline)); 4039 if (cols < start || cols > end - (*p_sel == 'e')) 4040 status = RA_NOMATCH; 4041 } 4042 } 4043 #else 4044 status = RA_NOMATCH; 4045 #endif 4046 break; 4047 4048 case RE_LNUM: Line 4441 to 44494441 4442 case RE_VISUAL: 4443 #ifdef FEAT_VISUAL 4444 if (!reg_match_visual()) 4445 #endif 4446 status = RA_NOMATCH; 4447 break; 4448 4449 case RE_LNUM:
Line 4072 to 40784072 int this_class; 4073 4074 /* Get class of current and previous char (if it exists). */ 4075 this_class = mb_get_class(reginput); 4076 if (this_class <= 1) 4077 status = RA_NOMATCH; /* not on a word at all */ 4078 else if (reg_prev_class() == this_class) Line 4473 to 44794473 int this_class; 4474 4475 /* Get class of current and previous char (if it exists). */ 4476 this_class = mb_get_class_buf(reginput, reg_buf); 4477 if (this_class <= 1) 4478 status = RA_NOMATCH; /* not on a word at all */ 4479 else if (reg_prev_class() == this_class)
Line 4081 to 40884081 #endif 4082 else 4083 { 4084 if (!vim_iswordc(c) 4085 || (reginput > regline && vim_iswordc(reginput[-1]))) 4086 status = RA_NOMATCH; 4087 } 4088 break; Line 4482 to 44894482 #endif 4483 else 4484 { 4485 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline 4486 && vim_iswordc_buf(reginput[-1], reg_buf))) 4487 status = RA_NOMATCH; 4488 } 4489 break;
Line 4096 to 41024096 int this_class, prev_class; 4097 4098 /* Get class of current and previous char (if it exists). */ 4099 this_class = mb_get_class(reginput); 4100 prev_class = reg_prev_class(); 4101 if (this_class == prev_class 4102 || prev_class == 0 || prev_class == 1) Line 4497 to 45034497 int this_class, prev_class; 4498 4499 /* Get class of current and previous char (if it exists). */ 4500 this_class = mb_get_class_buf(reginput, reg_buf); 4501 prev_class = reg_prev_class(); 4502 if (this_class == prev_class 4503 || prev_class == 0 || prev_class == 1)
Line 4105 to 41174105 #endif 4106 else 4107 { 4108 if (!vim_iswordc(reginput[-1]) 4109 || (reginput[0] != NUL && vim_iswordc(c))) 4110 status = RA_NOMATCH; 4111 } 4112 break; /* Matched with EOW */ 4113 4114 case ANY: 4115 if (c == NUL) 4116 status = RA_NOMATCH; 4117 else Line 4506 to 45194506 #endif 4507 else 4508 { 4509 if (!vim_iswordc_buf(reginput[-1], reg_buf) 4510 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) 4511 status = RA_NOMATCH; 4512 } 4513 break; /* Matched with EOW */ 4514 4515 case ANY: 4516 /* ANY does not match new lines. */ 4517 if (c == NUL) 4518 status = RA_NOMATCH; 4519 else
Line 4133 to 41464133 break; 4134 4135 case KWORD: 4136 if (!vim_iswordp(reginput)) 4137 status = RA_NOMATCH; 4138 else 4139 ADVANCE_REGINPUT(); 4140 break; 4141 4142 case SKWORD: 4143 if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput)) 4144 status = RA_NOMATCH; 4145 else 4146 ADVANCE_REGINPUT(); Line 4535 to 45484535 break; 4536 4537 case KWORD: 4538 if (!vim_iswordp_buf(reginput, reg_buf)) 4539 status = RA_NOMATCH; 4540 else 4541 ADVANCE_REGINPUT(); 4542 break; 4543 4544 case SKWORD: 4545 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf)) 4546 status = RA_NOMATCH; 4547 else 4548 ADVANCE_REGINPUT();
Line 4161 to 41744161 break; 4162 4163 case PRINT: 4164 if (ptr2cells(reginput) != 1) 4165 status = RA_NOMATCH; 4166 else 4167 ADVANCE_REGINPUT(); 4168 break; 4169 4170 case SPRINT: 4171 if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1) 4172 status = RA_NOMATCH; 4173 else 4174 ADVANCE_REGINPUT(); Line 4563 to 45764563 break; 4564 4565 case PRINT: 4566 if (!vim_isprintc(PTR2CHAR(reginput))) 4567 status = RA_NOMATCH; 4568 else 4569 ADVANCE_REGINPUT(); 4570 break; 4571 4572 case SPRINT: 4573 if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput))) 4574 status = RA_NOMATCH; 4575 else 4576 ADVANCE_REGINPUT();
Line 4578 to 45864578 case BACKREF + 9: 4579 { 4580 int len; 4581 linenr_T clnum; 4582 colnr_T ccol; 4583 char_u *p; 4584 4585 no = op - BACKREF; 4586 cleanup_subexpr(); Line 4980 to 49854980 case BACKREF + 9: 4981 { 4982 int len; 4983 4984 no = op - BACKREF; 4985 cleanup_subexpr();
Line 4622 to 46884622 { 4623 /* Messy situation: Need to compare between two 4624 * lines. */ 4625 ccol = reg_startpos[no].col; 4626 clnum = reg_startpos[no].lnum; 4627 for (;;) 4628 { 4629 /* Since getting one line may invalidate 4630 * the other, need to make copy. Slow! */ 4631 if (regline != reg_tofree) 4632 { 4633 len = (int)STRLEN(regline); 4634 if (reg_tofree == NULL 4635 || len >= (int)reg_tofreelen) 4636 { 4637 len += 50; /* get some extra */ 4638 vim_free(reg_tofree); 4639 reg_tofree = alloc(len); 4640 if (reg_tofree == NULL) 4641 { 4642 status = RA_FAIL; /* outof memory!*/ 4643 break; 4644 } 4645 reg_tofreelen = len; 4646 } 4647 STRCPY(reg_tofree, regline); 4648 reginput = reg_tofree 4649 + (reginput - regline); 4650 regline = reg_tofree; 4651 } 4652 4653 /* Get the line to compare with. */ 4654 p = reg_getline(clnum); 4655 if (clnum == reg_endpos[no].lnum) 4656 len = reg_endpos[no].col - ccol; 4657 else 4658 len = (int)STRLEN(p + ccol); 4659 4660 if (cstrncmp(p + ccol, reginput, &len) != 0) 4661 { 4662 status = RA_NOMATCH; /* doesn't match */ 4663 break; 4664 } 4665 if (clnum == reg_endpos[no].lnum) 4666 break; /* match and at end! */ 4667 if (reglnum >= reg_maxline) 4668 { 4669 status = RA_NOMATCH; /* text too short */ 4670 break; 4671 } 4672 4673 /* Advance to next line. */ 4674 reg_nextline(); 4675 ++clnum; 4676 ccol = 0; 4677 if (got_int) 4678 { 4679 status = RA_FAIL; 4680 break; 4681 } 4682 } 4683 4684 /* found a match! Note that regline may now point 4685 * to a copy of the line, that should not matter. */ 4686 } 4687 } 4688 } Line 5021 to 50355021 { 5022 /* Messy situation: Need to compare between two 5023 * lines. */ 5024 int r = match_with_backref( 5025 reg_startpos[no].lnum, 5026 reg_startpos[no].col, 5027 reg_endpos[no].lnum, 5028 reg_endpos[no].col, 5029 &len); 5030 5031 if (r != RA_MATCH) 5032 status = r; 5033 } 5034 } 5035 }
Line 5153 to 51595153 /* save the position after the found match for next */ 5154 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); 5155 5156 /* start looking for a match with operand at the current 5157 * position. Go back one character until we find the 5158 * result, hitting the start of the line or the previous 5159 * line (for multi-line matching). Line 5500 to 55065500 /* save the position after the found match for next */ 5501 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); 5502 5503 /* Start looking for a match with operand at the current 5504 * position. Go back one character until we find the 5505 * result, hitting the start of the line or the previous 5506 * line (for multi-line matching).
Line 5165 to 51715165 rp->rs_state = RS_BEHIND2; 5166 5167 reg_restore(&rp->rs_un.regsave, &backpos); 5168 scan = OPERAND(rp->rs_scan); 5169 } 5170 break; 5171 Line 5512 to 55185512 rp->rs_state = RS_BEHIND2; 5513 5514 reg_restore(&rp->rs_un.regsave, &backpos); 5515 scan = OPERAND(rp->rs_scan) + 4; 5516 } 5517 break; 5518
Line 5193 to 52045193 } 5194 else 5195 { 5196 /* No match or a match that doesn't end where we want it: Go 5197 * back one character. May go to previous line once. */ 5198 no = OK; 5199 if (REG_MULTI) 5200 { 5201 if (rp->rs_un.regsave.rs_u.pos.col == 0) 5202 { 5203 if (rp->rs_un.regsave.rs_u.pos.lnum 5204 < behind_pos.rs_u.pos.lnum Line 5540 to 55615540 } 5541 else 5542 { 5543 long limit; 5544 5545 /* No match or a match that doesn't end where we want it: Go 5546 * back one character. May go to previous line once. */ 5547 no = OK; 5548 limit = OPERAND_MIN(rp->rs_scan); 5549 if (REG_MULTI) 5550 { 5551 if (limit > 0 5552 && ((rp->rs_un.regsave.rs_u.pos.lnum 5553 < behind_pos.rs_u.pos.lnum 5554 ? (colnr_T)STRLEN(regline) 5555 : behind_pos.rs_u.pos.col) 5556 - rp->rs_un.regsave.rs_u.pos.col >= limit)) 5557 no = FAIL; 5558 else if (rp->rs_un.regsave.rs_u.pos.col == 0) 5559 { 5560 if (rp->rs_un.regsave.rs_u.pos.lnum 5561 < behind_pos.rs_u.pos.lnum
Line 5214 to 52335214 } 5215 } 5216 else 5217 --rp->rs_un.regsave.rs_u.pos.col; 5218 } 5219 else 5220 { 5221 if (rp->rs_un.regsave.rs_u.ptr == regline) 5222 no = FAIL; 5223 else 5224 --rp->rs_un.regsave.rs_u.ptr; 5225 } 5226 if (no == OK) 5227 { 5228 /* Advanced, prepare for finding match again. */ 5229 reg_restore(&rp->rs_un.regsave, &backpos); 5230 scan = OPERAND(rp->rs_scan); 5231 if (status == RA_MATCH) 5232 { 5233 /* We did match, so subexpr may have been changed, Line 5571 to 56045571 } 5572 } 5573 else 5574 { 5575 #ifdef FEAT_MBYTE 5576 if (has_mbyte) 5577 rp->rs_un.regsave.rs_u.pos.col -= 5578 (*mb_head_off)(regline, regline 5579 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; 5580 else 5581 #endif 5582 --rp->rs_un.regsave.rs_u.pos.col; 5583 } 5584 } 5585 else 5586 { 5587 if (rp->rs_un.regsave.rs_u.ptr == regline) 5588 no = FAIL; 5589 else 5590 { 5591 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr); 5592 if (limit > 0 && (long)(behind_pos.rs_u.ptr 5593 - rp->rs_un.regsave.rs_u.ptr) > limit) 5594 no = FAIL; 5595 } 5596 } 5597 if (no == OK) 5598 { 5599 /* Advanced, prepare for finding match again. */ 5600 reg_restore(&rp->rs_un.regsave, &backpos); 5601 scan = OPERAND(rp->rs_scan) + 4; 5602 if (status == RA_MATCH) 5603 { 5604 /* We did match, so subexpr may have been changed,
Line 5472 to 54785472 case SIDENT + ADD_NL: 5473 while (count < maxcount) 5474 { 5475 if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan))) 5476 { 5477 mb_ptr_adv(scan); 5478 } Line 5843 to 58495843 case SIDENT + ADD_NL: 5844 while (count < maxcount) 5845 { 5846 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan))) 5847 { 5848 mb_ptr_adv(scan); 5849 }
Line 5502 to 55085502 case SKWORD + ADD_NL: 5503 while (count < maxcount) 5504 { 5505 if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan))) 5506 { 5507 mb_ptr_adv(scan); 5508 } Line 5873 to 58805873 case SKWORD + ADD_NL: 5874 while (count < maxcount) 5875 { 5876 if (vim_iswordp_buf(scan, reg_buf) 5877 && (testval || !VIM_ISDIGIT(*scan))) 5878 { 5879 mb_ptr_adv(scan); 5880 }
Line 5532 to 55385532 case SFNAME + ADD_NL: 5533 while (count < maxcount) 5534 { 5535 if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan))) 5536 { 5537 mb_ptr_adv(scan); 5538 } Line 5904 to 59105904 case SFNAME + ADD_NL: 5905 while (count < maxcount) 5906 { 5907 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan))) 5908 { 5909 mb_ptr_adv(scan); 5910 }
Line 5572 to 55785572 if (got_int) 5573 break; 5574 } 5575 else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan))) 5576 { 5577 mb_ptr_adv(scan); 5578 } Line 5944 to 59515944 if (got_int) 5945 break; 5946 } 5947 else if (vim_isprintc(PTR2CHAR(scan)) == 1 5948 && (testval || !VIM_ISDIGIT(*scan))) 5949 { 5950 mb_ptr_adv(scan); 5951 }
Line 5848 to 58565848 static int 5849 prog_magic_wrong() 5850 { 5851 if (UCHARAT(REG_MULTI 5852 ? reg_mmatch->regprog->program 5853 : reg_match->regprog->program) != REGMAGIC) 5854 { 5855 EMSG(_(e_re_corr)); 5856 return TRUE; Line 6221 to 62346221 static int 6222 prog_magic_wrong() 6223 { 6224 regprog_T *prog; 6225 6226 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog; 6227 if (prog->engine == &nfa_regengine) 6228 /* For NFA matcher we don't check the magic */ 6229 return FALSE; 6230 6231 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC) 6232 { 6233 EMSG(_(e_re_corr)); 6234 return TRUE;
Line 6074 to 60816074 return val == n; 6075 } 6076 6077 6078 #ifdef DEBUG 6079 6080 /* 6081 * regdump - dump a regexp onto stdout in vaguely comprehensible form Line 6452 to 65286452 return val == n; 6453 } 6454 6455 /* 6456 * Check whether a backreference matches. 6457 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH. 6458 * If "bytelen" is not NULL, it is set to the bytelength of the whole match. 6459 */ 6460 static int 6461 match_with_backref(start_lnum, start_col, end_lnum, end_col, bytelen) 6462 linenr_T start_lnum; 6463 colnr_T start_col; 6464 linenr_T end_lnum; 6465 colnr_T end_col; 6466 int *bytelen; 6467 { 6468 linenr_T clnum = start_lnum; 6469 colnr_T ccol = start_col; 6470 int len; 6471 char_u *p; 6472 6473 if (bytelen != NULL) 6474 *bytelen = 0; 6475 for (;;) 6476 { 6477 /* Since getting one line may invalidate the other, need to make copy. 6478 * Slow! */ 6479 if (regline != reg_tofree) 6480 { 6481 len = (int)STRLEN(regline); 6482 if (reg_tofree == NULL || len >= (int)reg_tofreelen) 6483 { 6484 len += 50; /* get some extra */ 6485 vim_free(reg_tofree); 6486 reg_tofree = alloc(len); 6487 if (reg_tofree == NULL) 6488 return RA_FAIL; /* out of memory!*/ 6489 reg_tofreelen = len; 6490 } 6491 STRCPY(reg_tofree, regline); 6492 reginput = reg_tofree + (reginput - regline); 6493 regline = reg_tofree; 6494 } 6495 6496 /* Get the line to compare with. */ 6497 p = reg_getline(clnum); 6498 if (clnum == end_lnum) 6499 len = end_col - ccol; 6500 else 6501 len = (int)STRLEN(p + ccol); 6502 6503 if (cstrncmp(p + ccol, reginput, &len) != 0) 6504 return RA_NOMATCH; /* doesn't match */ 6505 if (bytelen != NULL) 6506 *bytelen += len; 6507 if (clnum == end_lnum) 6508 break; /* match and at end! */ 6509 if (reglnum >= reg_maxline) 6510 return RA_NOMATCH; /* text too short */ 6511 6512 /* Advance to next line. */ 6513 reg_nextline(); 6514 ++clnum; 6515 ccol = 0; 6516 if (got_int) 6517 return RA_FAIL; 6518 } 6519 6520 /* found a match! Note that regline may now point to a copy of the line, 6521 * that should not matter. */ 6522 return RA_MATCH; 6523 } 6524 6525 #ifdef BT_REGEXP_DUMP 6526 6527 /* 6528 * regdump - dump a regexp onto stdout in vaguely comprehensible form
Line 6083 to 60966083 static void 6084 regdump(pattern, r) 6085 char_u *pattern; 6086 regprog_T *r; 6087 { 6088 char_u *s; 6089 int op = EXACTLY; /* Arbitrary non-END op. */ 6090 char_u *next; 6091 char_u *end = NULL; 6092 6093 printf("\r\nregcomp(%s):\r\n", pattern); 6094 6095 s = r->program + 1; 6096 /* Line 6530 to 65516530 static void 6531 regdump(pattern, r) 6532 char_u *pattern; 6533 bt_regprog_T *r; 6534 { 6535 char_u *s; 6536 int op = EXACTLY; /* Arbitrary non-END op. */ 6537 char_u *next; 6538 char_u *end = NULL; 6539 FILE *f; 6540 6541 #ifdef BT_REGEXP_LOG 6542 f = fopen("bt_regexp_log.log", "a"); 6543 #else 6544 f = stdout; 6545 #endif 6546 if (f == NULL) 6547 return; 6548 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern); 6549 6550 s = r->program + 1; 6551 /*
Line 6100 to 61446100 while (op != END || s <= end) 6101 { 6102 op = OP(s); 6103 printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ 6104 next = regnext(s); 6105 if (next == NULL) /* Next ptr. */ 6106 printf("(0)"); 6107 else 6108 printf("(%d)", (int)((s - r->program) + (next - s))); 6109 if (end < next) 6110 end = next; 6111 if (op == BRACE_LIMITS) 6112 { 6113 /* Two short ints */ 6114 printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s)); 6115 s += 8; 6116 } 6117 s += 3; 6118 if (op == ANYOF || op == ANYOF + ADD_NL 6119 || op == ANYBUT || op == ANYBUT + ADD_NL 6120 || op == EXACTLY) 6121 { 6122 /* Literal string, where present. */ 6123 while (*s != NUL) 6124 printf("%c", *s++); 6125 s++; 6126 } 6127 printf("\r\n"); 6128 } 6129 6130 /* Header fields of interest. */ 6131 if (r->regstart != NUL) 6132 printf("start `%s' 0x%x; ", r->regstart < 256 6133 ? (char *)transchar(r->regstart) 6134 : "multibyte", r->regstart); 6135 if (r->reganch) 6136 printf("anchored; "); 6137 if (r->regmust != NULL) 6138 printf("must have \"%s\"", r->regmust); 6139 printf("\r\n"); 6140 } 6141 6142 /* 6143 * regprop - printable representation of opcode 6144 */ Line 6555 to 66196555 while (op != END || s <= end) 6556 { 6557 op = OP(s); 6558 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ 6559 next = regnext(s); 6560 if (next == NULL) /* Next ptr. */ 6561 fprintf(f, "(0)"); 6562 else 6563 fprintf(f, "(%d)", (int)((s - r->program) + (next - s))); 6564 if (end < next) 6565 end = next; 6566 if (op == BRACE_LIMITS) 6567 { 6568 /* Two ints */ 6569 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s)); 6570 s += 8; 6571 } 6572 else if (op == BEHIND || op == NOBEHIND) 6573 { 6574 /* one int */ 6575 fprintf(f, " count %ld", OPERAND_MIN(s)); 6576 s += 4; 6577 } 6578 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL) 6579 { 6580 /* one int plus comperator */ 6581 fprintf(f, " count %ld", OPERAND_MIN(s)); 6582 s += 5; 6583 } 6584 s += 3; 6585 if (op == ANYOF || op == ANYOF + ADD_NL 6586 || op == ANYBUT || op == ANYBUT + ADD_NL 6587 || op == EXACTLY) 6588 { 6589 /* Literal string, where present. */ 6590 fprintf(f, "\nxxxxxxxxx\n"); 6591 while (*s != NUL) 6592 fprintf(f, "%c", *s++); 6593 fprintf(f, "\nxxxxxxxxx\n"); 6594 s++; 6595 } 6596 fprintf(f, "\r\n"); 6597 } 6598 6599 /* Header fields of interest. */ 6600 if (r->regstart != NUL) 6601 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256 6602 ? (char *)transchar(r->regstart) 6603 : "multibyte", r->regstart); 6604 if (r->reganch) 6605 fprintf(f, "anchored; "); 6606 if (r->regmust != NULL) 6607 fprintf(f, "must have \"%s\"", r->regmust); 6608 fprintf(f, "\r\n"); 6609 6610 #ifdef BT_REGEXP_LOG 6611 fclose(f); 6612 #endif 6613 } 6614 #endif /* BT_REGEXP_DUMP */ 6615 6616 #ifdef DEBUG 6617 /* 6618 * regprop - printable representation of opcode 6619 */
Line 6146 to 61576146 regprop(op) 6147 char_u *op; 6148 { 6149 char_u *p; 6150 static char_u buf[50]; 6151 6152 (void) strcpy(buf, ":"); 6153 6154 switch (OP(op)) 6155 { 6156 case BOL: 6157 p = "BOL"; Line 6621 to 66326621 regprop(op) 6622 char_u *op; 6623 { 6624 char *p; 6625 static char buf[50]; 6626 6627 STRCPY(buf, ":"); 6628 6629 switch ((int) OP(op)) 6630 { 6631 case BOL: 6632 p = "BOL";
Line 6518 to 65276518 break; 6519 } 6520 if (p != NULL) 6521 (void) strcat(buf, p); 6522 return buf; 6523 } 6524 #endif 6525 6526 #ifdef FEAT_MBYTE 6527 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3)); Line 6993 to 70026993 break; 6994 } 6995 if (p != NULL) 6996 STRCAT(buf, p); 6997 return (char_u *)buf; 6998 } 6999 #endif /* DEBUG */ 7000 7001 #ifdef FEAT_MBYTE 7002 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
Line 6591 to 65976591 { 6592 decomp_T d; 6593 6594 if (c >= 0x4b20 && c <= 0xfb4f) 6595 { 6596 d = decomp_table[c - 0xfb20]; 6597 *c1 = d.a; Line 7066 to 70727066 { 7067 decomp_T d; 7068 7069 if (c >= 0xfb20 && c <= 0xfb4f) 7070 { 7071 d = decomp_table[c - 0xfb20]; 7072 *c1 = d.a;
Line 6871 to 68766871 static regmmatch_T *submatch_mmatch; 6872 static linenr_T submatch_firstlnum; 6873 static linenr_T submatch_maxline; 6874 #endif 6875 6876 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO) Line 7346 to 73527346 static regmmatch_T *submatch_mmatch; 7347 static linenr_T submatch_firstlnum; 7348 static linenr_T submatch_maxline; 7349 static int submatch_line_lbr; 7350 #endif 7351 7352 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
Line 6904 to 69096904 reg_match = rmp; 6905 reg_mmatch = NULL; 6906 reg_maxline = 0; 6907 return vim_regsub_both(source, dest, copy, magic, backslash); 6908 } 6909 #endif Line 7380 to 73867380 reg_match = rmp; 7381 reg_mmatch = NULL; 7382 reg_maxline = 0; 7383 reg_buf = curbuf; 7384 return vim_regsub_both(source, dest, copy, magic, backslash); 7385 } 7386 #endif
Line 6940 to 69466940 int c; 6941 int cc; 6942 int no = -1; 6943 fptr_T func = (fptr_T)NULL; 6944 linenr_T clnum = 0; /* init for GCC */ 6945 int len = 0; /* init for GCC */ 6946 #ifdef FEAT_EVAL Line 7417 to 74247417 int c; 7418 int cc; 7419 int no = -1; 7420 fptr_T func_all = (fptr_T)NULL; 7421 fptr_T func_one = (fptr_T)NULL; 7422 linenr_T clnum = 0; /* init for GCC */ 7423 int len = 0; /* init for GCC */ 7424 #ifdef FEAT_EVAL
Line 6997 to 70026997 submatch_mmatch = reg_mmatch; 6998 submatch_firstlnum = reg_firstlnum; 6999 submatch_maxline = reg_maxline; 7000 save_reg_win = reg_win; 7001 save_ireg_ic = ireg_ic; 7002 can_f_submatch = TRUE; Line 7475 to 74817475 submatch_mmatch = reg_mmatch; 7476 submatch_firstlnum = reg_firstlnum; 7477 submatch_maxline = reg_maxline; 7478 submatch_line_lbr = reg_line_lbr; 7479 save_reg_win = reg_win; 7480 save_ireg_ic = ireg_ic; 7481 can_f_submatch = TRUE;
Line 7008 to 70167008 7009 for (s = eval_result; *s != NUL; mb_ptr_adv(s)) 7010 { 7011 /* Change NL to CR, so that it becomes a line break. 7012 * Skip over a backslashed character. */ 7013 if (*s == NL) 7014 *s = CAR; 7015 else if (*s == '\\' && s[1] != NUL) 7016 { Line 7487 to 74967487 7488 for (s = eval_result; *s != NUL; mb_ptr_adv(s)) 7489 { 7490 /* Change NL to CR, so that it becomes a line break, 7491 * unless called from vim_regexec_nl(). 7492 * Skip over a backslashed character. */ 7493 if (*s == NL && !submatch_line_lbr) 7494 *s = CAR; 7495 else if (*s == '\\' && s[1] != NUL) 7496 {
Line 7019 to 70267019 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: 7020 * abc\ 7021 * def 7022 */ 7023 if (*s == NL) 7024 *s = CAR; 7025 had_backslash = TRUE; 7026 } Line 7499 to 75077499 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: 7500 * abc\ 7501 * def 7502 * Not when called from vim_regexec_nl(). 7503 */ 7504 if (*s == NL && !submatch_line_lbr) 7505 *s = CAR; 7506 had_backslash = TRUE; 7507 }
Line 7043 to 70487043 reg_mmatch = submatch_mmatch; 7044 reg_firstlnum = submatch_firstlnum; 7045 reg_maxline = submatch_maxline; 7046 reg_win = save_reg_win; 7047 ireg_ic = save_ireg_ic; 7048 can_f_submatch = FALSE; Line 7524 to 75307524 reg_mmatch = submatch_mmatch; 7525 reg_firstlnum = submatch_firstlnum; 7526 reg_maxline = submatch_maxline; 7527 reg_line_lbr = submatch_line_lbr; 7528 reg_win = save_reg_win; 7529 ireg_ic = save_ireg_ic; 7530 can_f_submatch = FALSE;
Line 7069 to 70847069 { 7070 switch (*src++) 7071 { 7072 case 'u': func = (fptr_T)do_upper; 7073 continue; 7074 case 'U': func = (fptr_T)do_Upper; 7075 continue; 7076 case 'l': func = (fptr_T)do_lower; 7077 continue; 7078 case 'L': func = (fptr_T)do_Lower; 7079 continue; 7080 case 'e': 7081 case 'E': func = (fptr_T)NULL; 7082 continue; 7083 } 7084 } Line 7551 to 75667551 { 7552 switch (*src++) 7553 { 7554 case 'u': func_one = (fptr_T)do_upper; 7555 continue; 7556 case 'U': func_all = (fptr_T)do_Upper; 7557 continue; 7558 case 'l': func_one = (fptr_T)do_lower; 7559 continue; 7560 case 'L': func_all = (fptr_T)do_Lower; 7561 continue; 7562 case 'e': 7563 case 'E': func_one = func_all = (fptr_T)NULL; 7564 continue; 7565 } 7566 }
Line 7131 to 71417131 #endif 7132 7133 /* Write to buffer, if copy is set. */ 7134 if (func == (fptr_T)NULL) /* just copy */ 7135 cc = c; 7136 else 7137 /* Turbo C complains without the typecast */ 7138 func = (fptr_T)(func(&cc, c)); 7139 7140 #ifdef FEAT_MBYTE 7141 if (has_mbyte) Line 7613 to 76267613 #endif 7614 7615 /* Write to buffer, if copy is set. */ 7616 if (func_one != (fptr_T)NULL) 7617 /* Turbo C complains without the typecast */ 7618 func_one = (fptr_T)(func_one(&cc, c)); 7619 else if (func_all != (fptr_T)NULL) 7620 /* Turbo C complains without the typecast */ 7621 func_all = (fptr_T)(func_all(&cc, c)); 7622 else /* just copy */ 7623 cc = c; 7624 7625 #ifdef FEAT_MBYTE 7626 if (has_mbyte)
Line 7246 to 72567246 #endif 7247 c = *s; 7248 7249 if (func == (fptr_T)NULL) /* just copy */ 7250 cc = c; 7251 else 7252 /* Turbo C complains without the typecast */ 7253 func = (fptr_T)(func(&cc, c)); 7254 7255 #ifdef FEAT_MBYTE 7256 if (has_mbyte) Line 7731 to 77447731 #endif 7732 c = *s; 7733 7734 if (func_one != (fptr_T)NULL) 7735 /* Turbo C complains without the typecast */ 7736 func_one = (fptr_T)(func_one(&cc, c)); 7737 else if (func_all != (fptr_T)NULL) 7738 /* Turbo C complains without the typecast */ 7739 func_all = (fptr_T)(func_all(&cc, c)); 7740 else /* just copy */ 7741 cc = c; 7742 7743 #ifdef FEAT_MBYTE 7744 if (has_mbyte)
Line 7411 to 74137411 return retval; 7412 } 7413 #endif Line 7899 to 80907899 return retval; 7900 } 7901 #endif 7902 7903 static regengine_T bt_regengine = 7904 { 7905 bt_regcomp, 7906 bt_regfree, 7907 bt_regexec, 7908 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ 7909 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) 7910 bt_regexec_nl, 7911 #endif 7912 bt_regexec_multi 7913 #ifdef DEBUG 7914 ,(char_u *)"" 7915 #endif 7916 }; 7917 7918 7919 #include "regexp_nfa.c" 7920 7921 static regengine_T nfa_regengine = 7922 { 7923 nfa_regcomp, 7924 nfa_regfree, 7925 nfa_regexec, 7926 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ 7927 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) 7928 nfa_regexec_nl, 7929 #endif 7930 nfa_regexec_multi 7931 #ifdef DEBUG 7932 ,(char_u *)"" 7933 #endif 7934 }; 7935 7936 /* Which regexp engine to use? Needed for vim_regcomp(). 7937 * Must match with 'regexpengine'. */ 7938 static int regexp_engine = 0; 7939 #define AUTOMATIC_ENGINE 0 7940 #define BACKTRACKING_ENGINE 1 7941 #define NFA_ENGINE 2 7942 #ifdef DEBUG 7943 static char_u regname[][30] = { 7944 "AUTOMATIC Regexp Engine", 7945 "BACKTRACKING Regexp Engine", 7946 "NFA Regexp Engine" 7947 }; 7948 #endif 7949 7950 /* 7951 * Compile a regular expression into internal code. 7952 * Returns the program in allocated memory. 7953 * Use vim_regfree() to free the memory. 7954 * Returns NULL for an error. 7955 */ 7956 regprog_T * 7957 vim_regcomp(expr_arg, re_flags) 7958 char_u *expr_arg; 7959 int re_flags; 7960 { 7961 regprog_T *prog = NULL; 7962 char_u *expr = expr_arg; 7963 7964 regexp_engine = p_re; 7965 7966 /* Check for prefix "\%#=", that sets the regexp engine */ 7967 if (STRNCMP(expr, "\\%#=", 4) == 0) 7968 { 7969 int newengine = expr[4] - '0'; 7970 7971 if (newengine == AUTOMATIC_ENGINE 7972 || newengine == BACKTRACKING_ENGINE 7973 || newengine == NFA_ENGINE) 7974 { 7975 regexp_engine = expr[4] - '0'; 7976 expr += 5; 7977 #ifdef DEBUG 7978 EMSG3("New regexp mode selected (%d): %s", regexp_engine, 7979 regname[newengine]); 7980 #endif 7981 } 7982 else 7983 { 7984 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); 7985 regexp_engine = AUTOMATIC_ENGINE; 7986 } 7987 } 7988 #ifdef DEBUG 7989 bt_regengine.expr = expr; 7990 nfa_regengine.expr = expr; 7991 #endif 7992 7993 /* 7994 * First try the NFA engine, unless backtracking was requested. 7995 */ 7996 if (regexp_engine != BACKTRACKING_ENGINE) 7997 prog = nfa_regengine.regcomp(expr, re_flags); 7998 else 7999 prog = bt_regengine.regcomp(expr, re_flags); 8000 8001 if (prog == NULL) /* error compiling regexp with initial engine */ 8002 { 8003 #ifdef BT_REGEXP_DEBUG_LOG 8004 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */ 8005 { 8006 FILE *f; 8007 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); 8008 if (f) 8009 { 8010 fprintf(f, "Syntax error in \"%s\"\n", expr); 8011 fclose(f); 8012 } 8013 else 8014 EMSG2("(NFA) Could not open \"%s\" to write !!!", 8015 BT_REGEXP_DEBUG_LOG_NAME); 8016 } 8017 #endif 8018 /* 8019 * If NFA engine failed, then revert to the backtracking engine. 8020 * Except when there was a syntax error, which was properly handled by 8021 * NFA engine. 8022 */ 8023 if (regexp_engine == AUTOMATIC_ENGINE) 8024 prog = bt_regengine.regcomp(expr, re_flags); 8025 } 8026 8027 return prog; 8028 } 8029 8030 /* 8031 * Free a compiled regexp program, returned by vim_regcomp(). 8032 */ 8033 void 8034 vim_regfree(prog) 8035 regprog_T *prog; 8036 { 8037 if (prog != NULL) 8038 prog->engine->regfree(prog); 8039 } 8040 8041 /* 8042 * Match a regexp against a string. 8043 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). 8044 * Uses curbuf for line count and 'iskeyword'. 8045 * 8046 * Return TRUE if there is a match, FALSE if not. 8047 */ 8048 int 8049 vim_regexec(rmp, line, col) 8050 regmatch_T *rmp; 8051 char_u *line; /* string to match against */ 8052 colnr_T col; /* column to start looking for match */ 8053 { 8054 return rmp->regprog->engine->regexec(rmp, line, col); 8055 } 8056 8057 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ 8058 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) 8059 /* 8060 * Like vim_regexec(), but consider a "\n" in "line" to be a line break. 8061 */ 8062 int 8063 vim_regexec_nl(rmp, line, col) 8064 regmatch_T *rmp; 8065 char_u *line; 8066 colnr_T col; 8067 { 8068 return rmp->regprog->engine->regexec_nl(rmp, line, col); 8069 } 8070 #endif 8071 8072 /* 8073 * Match a regexp against multiple lines. 8074 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). 8075 * Uses curbuf for line count and 'iskeyword'. 8076 * 8077 * Return zero if there is no match. Return number of lines contained in the 8078 * match otherwise. 8079 */ 8080 long 8081 vim_regexec_multi(rmp, win, buf, lnum, col, tm) 8082 regmmatch_T *rmp; 8083 win_T *win; /* window in which to search or NULL */ 8084 buf_T *buf; /* buffer in which to search */ 8085 linenr_T lnum; /* nr of line to start looking for match */ 8086 colnr_T col; /* column to start looking for match */ 8087 proftime_T *tm; /* timeout limit or NULL */ 8088 { 8089 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); 8090 }

Generated on Tue, 29 Apr 2014 16:58:40 EST

Legend:
filename 1filename 2
Line 1 to 3 1 line deleted 2 no change 3 line modified Line 1 to 3 1 line added 2 no change 3 Line Modified