User Tools

Site Tools


c:memmem

This is an old revision of the document!


Here's an implementation of the GNU function memmem that appears to be much faster (2x) than the standard memmem included on my Debian system for my workload.

#include <string.h>
#include <stdint.h>
 
// Implements the GNU function memmem much faster (2x) than the standard memmem included on my Debian system
char* memmem(char* haystack, int hlen, char* needle, int nlen) {
	if (nlen > hlen) return 0;
	int i,j;
	switch(nlen) { // we have a few specialized compares for certain needle sizes
	case 0: // no needle? just give the haystack
		return haystack;
	case 1: // just use memchr for 1-byte needle
		return memchr(haystack, needle[0], hlen);
	case 2: // use 16-bit compares for 2-byte needles
		for (i=0; i<hlen-nlen+1; i++) {
			if (*(uint16_t*)(haystack+i)==*(uint16_t*)needle) {
				return haystack+i;
			}
		}
		break;
	case 4: // use 32-bit compares for 4-byte needles
		for (i=0; i<hlen-nlen+1; i++) {
			if (*(uint32_t*)(haystack+i)==*(uint32_t*)needle) {
				return haystack+i;
			}
		}
		break;
	/* actually slower on my 32-bit machine
	case 8: // use 64-bit compares for 8-byte needles
		for (i=0; i<hlen-nlen+1; i++) {
			if (*(uint64_t*)(haystack+i)==*(uint64_t*)needle) {
				return haystack+i;
			}
		}
		break;
	*/
	default: // generic compare for any other needle size
		// walk i through the haystack, matching j as long as needle[j] matches haystack[i]
		for (i=0; i<hlen-nlen+1; i++) {
			if (haystack[i]==needle[j]) {
				if (j==nlen-1) { // end of needle and it all matched?  win.
					return haystack+i-j;
				} else { // keep advancing j (and i, implicitly)
					j++;
				}
			} else { // no match, rewind i the length of the failed match (j), and reset j
				i-=j;
				j=0;
			}
		}
	}
	return NULL;
}

Also, here's a really dumb implementation for no reason.

char* DUMB_memmem(char* haystack, int hlen, char* needle, int nlen) {
	// naive implementation
	if (nlen > hlen) return 0;
	int i;
	for (i=0; i<hlen-nlen+1; i++) {
		if (memcmp(haystack+i,needle,nlen)==0) {
			return haystack+i;
		}
	}
	return NULL;
}
c/memmem.1294261309.txt.gz · Last modified: 2011/01/05 13:01 by tkbletsc

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki