C语言实现围棋SGF格式棋谱解析器

完整源码：
链接: https://pan.baidu.com/s/1MglwXjH-59Nbz2ntWfGmvQ 提取码: hytu

　　这是本人（liigo）独立实现的SGF格式围棋棋谱文件解析器，本文介绍其实现细节。网络上肯定可以找到完善的开源的SGF解析器，这是毋庸置疑的，我不直接使用它们，也不参考它们的实现代码，而是自己独立编码实现，是有原因的，因为我想自己重复发明轮子，并且认为这样更有助于提高我的编码能力。（关于我的“一定要学会重复发明轮子”的不成熟的论调，今后我将会专门撰文表述。）

　　我（liigo）开发的这个SGF解析器，采用基于事件的简单API，类似于XML解析器中的SAX(Simple API for XML)。这种解析器的核心是：由用户事先提供一系列回调函数，解析器在解析的过程中，依次调用相关的回调函数并传入相应参数，用户程序在回调函数中做出相应的处理。此类解析器属于轻量级的解析器，解析速度快，占用内存少，结构清晰易于实现，只是相对来说不如基于DOM的解析器方便使用。

　　SGF格式，Smart Game Format，被设计用来记录多种游戏类棋谱的通用格式，在围棋领域被发扬光大，是用于描述围棋棋谱的最重要也最通用的形式。它是纯文本的、基于树（TREE）的结构，便于识别、存储和传输。其格式简洁实用，也非常易于编程解析。SGF格式官方规范网址为：http://www.red-bean.com/sgf/。（说到围棋棋谱，不得不赞叹一下，它只需用一幅图就可以完整还原一盘棋从始至终的风云变幻；作为对比，象棋一幅图只能描述对弈中某一时刻的场景。）

　　SGF的主要结构由树（GameTree）、节点序列（Sequence）、节点（Node）、属性（Property）等组成。其中“属性”为最重要的基本单位，它由属性标识（PropIdent）和属性值（PropValue）组成。由分号“;”分隔的多个属性，称为节点。多个节点顺序排列称为节点序列。由括号“(”“)”括起来的节点序列，称为树，树中可包含子树。SGF的EBNF定义如下
（参见http://www.red-bean.com/sgf/sgf4.html#ebnf-def）：

Collection = GameTree { GameTree }
GameTree   = "(" Sequence { GameTree } ")"
Sequence   = Node { Node }
Node       = ";" { Property }
Property   = PropIdent PropValue { PropValue }
PropIdent  = UcLetter { UcLetter }
PropValue  = "[" CValueType "]"
CValueType = (ValueType | Compose)
ValueType  = (None | Number | Real | Double | Color | SimpleText | Text | Point  | Move | Stone)

以下是一个简单的有一定代表性的SGF文本，先让大家有一个感性认识：

(;FF[4]GM[1]SZ[19]FG[257:Figure 1]PM[1]
PB[Takemiya Masaki]BR[9 dan]PW[Cho Chikun]
WR[9 dan]RE[W+Resign]KM[5.5]TM[28800]DT[1996-10-18,19]
EV[21st Meijin]RO[2 (final)]SO[Go World #78]US[Arno Hollosi]
;B[pd];W[dp];B[pp];W[dd];B[pj];W[nc];B[oe];W[qc];B[pc];W[qd]
(;B[qf];W[rf];B[rg];W[re];B[qg];W[pb];B[ob];W[qb]
(;B[mp];W[fq];B[ci];W[cg];B[dl];W[cn];B[qo];W[ec];B[jp];W[jd]
;B[ei];W[eg];B[kk]LB[qq:a][dj:b][ck:c][qp:d]N[Figure 1]

;W[me]FG[257:Figure 2];B[kf];W[ke];B[lf];W[jf];B[jg]
(;W[mf];B[if];W[je];B[ig];W[mg];B[mj];W[mq];B[lq];W[nq]
(;B[lr];W[qq];B[pq];W[pr];B[rq];W[rr];B[rp];W[oq];B[mr];W[oo];B[mn]
(;W[nr];B[qp]LB[kd:a][kh:b]N[Figure 2]

;W[pk]FG[257:Figure 3];B[pm];W[oj];B[ok];W[qr];B[os];W[ol];B[nk];W[qj]
;B[pi];W[pl];B[qm];W[ns];B[sr];W[om];B[op];W[qi];B[oi]
(;W[rl];B[qh];W[rm];B[rn];W[ri];B[ql];W[qk];B[sm];W[sk];B[sh];W[og]
;B[oh];W[np];B[no];W[mm];B[nn];W[lp];B[kp];W[lo];B[ln];W[ko];B[mo]
;W[jo];B[km]N[Figure 3])

(;W[ql]VW[ja:ss]FG[257:Dia. 6]MN[1];B[rm];W[ph];B[oh];W[pg];B[og];W[pf]
;B[qh];W[qe];B[sh];W[of];B[sj]TR[oe][pd][pc][ob]LB[pe:a][sg:b][si:c]
N[Diagram 6]))

(;W[no]VW[jj:ss]FG[257:Dia. 5]MN[1];B[pn]N[Diagram 5]))

(;B[pr]FG[257:Dia. 4]MN[1];W[kq];B[lp];W[lr];B[jq];W[jr];B[kp];W[kr];B[ir]
;W[hr]LB[is:a][js:b][or:c]N[Diagram 4]))

(;W[if]FG[257:Dia. 3]MN[1];B[mf];W[ig];B[jh]LB[ki:a]N[Diagram 3]))

(;W[oc]VW[aa:sk]FG[257:Dia. 2]MN[1];B[md];W[mc];B[ld]N[Diagram 2]))

(;B[qe]VW[aa:sj]FG[257:Dia. 1]MN[1];W[re];B[qf];W[rf];B[qg];W[pb];B[ob]
;W[qb]LB[rg:a]N[Diagram 1]))

　熟悉编写文本解析器的程序员朋友应该都清楚，根据EBNF定义，编写对应的解析器，是相当简单和直观的，貌似只是一项翻译性的工作。本人实现SGF解析器，再次印证了这个观点，大部分情况下，我只是按部就班地将EBNF翻译为C语言代码而已，呵呵。

　　我首先设计了“SGFParseContext”结构，用于保存解析器工作期间的相关数据：

typedef struct _tagSGFParseContext
{
        void* pUserData;
        int treeIndex;

        PFN_ON_TREE pfnOnTree;
        PFN_ON_TREE_END pfnOnTreeEnd;
        PFN_ON_NODE pfnOnNode;
        PFN_ON_NODE_END pfnOnNodeEnd;
        PFN_ON_PROPERTY pfnOnProperty;

        char idBuffer[16];
        char* valueBuffer;
        int valueBufferSize;
}
SGFParseContext;

相应的还有初始化和清理SGFParseContext结构的函数，initSGFParseContext, cleanupSGFParseContext，皆不是本解析器的关键，略过不提。

　　接着我(liigo)设计了五个回调函数的函数原形：

typedef void (*PFN_ON_TREE) (SGFParseContext* pContext, const char* szTreeHeader, int treeIndex);
typedef void (*PFN_ON_TREE_END) (SGFParseContext* pContext, int treeIndex);
typedef void (*PFN_ON_NODE) (SGFParseContext* pContext, const char* szNodeHeader);
typedef void (*PFN_ON_NODE_END) (SGFParseContext* pContext);
typedef void (*PFN_ON_PROPERTY) (SGFParseContext* pContext, const char* szID, const char* szValue);

这五个回调函数，将分别在解析器解析到“树开始”“树结束”“节点开始”“节点结束”“遇到属性”时，由解析器调用。解析器调用每个回调函数时，都会传入必需的参数，供回调函数即时取用。

　　下面正式开始解析工作。整个解析器被分为 parseProperty, parseNode, parseNodeSequence, parseGameTree, parseSGF 几大部分顺序解析，属于至底向上的分析实现模式。这几大部分，也分别对应着SGF的EBNF定义中的某一项。所有解析函数都接收参数 const char* szCollection, int fromPos，之前的解析函数将决定后续解析函数的起始解析位置。

　　第一步，解析属性（parseProperty）。此处关键的是要定位到属性值（szValue）开始和结束符号“[”和“]，两者之间的是属性值，“[”之前的则是属性标识（szID）。由于[和]之间可能存在转义字符“/”，不能简单地搜索字符“]”，必须花相当篇幅的代码处理转义字符（我用局部变量in_escape记录转义状态并进行分别处理）。此外要为提取出的属性标识和属性值分配足够的存储空间，以便传递到用户回调函数，前者不会太长使用静态分配，后者变长则使用动态分配（同时自动预分配存储空间，缓存，避免频繁申请内存）。代码如下：

//Property: id[value]
int parseProperty(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
        const char* szFromPos;
        int lindex;
        int nIDBufferSize = sizeof(pContext->idBuffer) - 1;
        assert(szCollection && fromPos >= 0);
        szFromPos = szCollection + fromPos;

        lindex = findchar(szFromPos, -1, '[');
        assert(lindex > 0 && lindex < nIDBufferSize);
        if(lindex > 0 && lindex < nIDBufferSize)
        {
                memcpy(pContext->idBuffer, szFromPos, lindex);
                pContext->idBuffer[lindex] = '/0';

                if(isTextPropertyID(pContext->idBuffer))
                {
                        //parse the text or simple-text value, consider the '/' escape character
                        const char* s = szFromPos + lindex + 1;
                        char c;
                        int in_escape = 0;
                        int valuelen = 0;
                        getEnoughBuffer(pContext, 1024);
                        pContext->valueBuffer[0] = '/0';
                        while(1)
                        {
                                c = *s;
                                assert(c);
                                if(!in_escape)
                                {
                                        if(c == '//')
                                        {
                                                in_escape = 1;
                                        }
                                        else if(c == ']')
                                        {
                                                break;
                                        }
                                        else
                                        {
                                                getEnoughBuffer(pContext, valuelen + 1);
                                                pContext->valueBuffer[valuelen++] = c;
                                        }
                                }
                                else
                                {
                                        //ignore the newline after '/'
                                        if(c != '/r' && c != '/n')
                                        {
                                                getEnoughBuffer(pContext, valuelen + 1);
                                                pContext->valueBuffer[valuelen++] = c;
                                        }
                                        else
                                        {
                                                char nc = *(s+1);
                                                if(nc)
                                                {
                                                        if((c=='/r' && nc=='/n') || (c=='/n' && nc=='/r'))
                                                                s++;
                                                }
                                        }
                                        in_escape = 0;
                                }
                                s++;
                        }
                        getEnoughBuffer(pContext, valuelen + 1);
                        pContext->valueBuffer[valuelen] = '/0';

                        if(pContext->pfnOnProperty) 
                                pContext->pfnOnProperty(pContext, pContext->idBuffer, pContext->valueBuffer);

                        return (s - szCollection + 1);
                }
                else
                {
                        int rindex = findchar(szFromPos, -1, ']');
                        int nNeedBufferSize = rindex - lindex - 1;
                        assert(rindex >= 0);
                        getEnoughBuffer(pContext, nNeedBufferSize);
                        memcpy(pContext->valueBuffer, szFromPos + lindex + 1, nNeedBufferSize);
                        pContext->valueBuffer[nNeedBufferSize] = '/0';

                        if(pContext->pfnOnProperty)
                                pContext->pfnOnProperty(pContext, pContext->idBuffer, pContext->valueBuffer);

                        return (fromPos + rindex + 1);
                }
        }
        return -1;
}

第二步，解析节点（parseNode）。分号“;”跟后面N个属性，一个while循环调用parseProperty()逐个解析属性即可：

//Node: ; {property}
int parseNode(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
        const char* szFromPos = szCollection + fromPos;
        assert(fromPos >= 0);
        //assert(szFromPos[0] == ';');

        if(pContext->pfnOnNode) 
                pContext->pfnOnNode(pContext, szFromPos);

        if(szFromPos[0] == ';')
        {
                fromPos++; szFromPos++;
        }

        while(1)
        {
                fromPos += skipSpaceChars(szFromPos, NULL);
                if(szCollection[fromPos] == '/0' || findchar(";)(", -1, szCollection[fromPos]) >= 0)
                        break;
                fromPos = parseProperty(pContext, szCollection, fromPos);
                szFromPos = szCollection + fromPos;
        }
        return fromPos;
}

第三步，解析节点序列（parseNodeSequence）。节点的顺序排列，至少有一个节点，后面可能还有0个或多个节点。仍然是一个while循环搞定：

//NodeSequence: node{node}
int parseNodeSequence(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
        const char* szFromPos = szCollection + fromPos;
        assert(fromPos >= 0);
        //assert(szFromPos[0] == ';');
        while(1)
        {
                fromPos = parseNode(pContext, szCollection, fromPos);
                fromPos += skipSpaceChars(szFromPos, NULL);
                szFromPos = szCollection + fromPos;
                if(szFromPos[0] != ';')
                {
                        if(pContext->pfnOnNodeEnd)
                                pContext->pfnOnNodeEnd(pContext);
                        break;
                }
        }
        return fromPos; 
}

第四步，解析树（parseGameTree）。树是一个嵌套结构，最外层是一对括号“(”“)”，里面是N个节点序列或N个嵌套的子树。仍然用一个while循环搞定，遇到“(”则递归调用parseGameTree()解析树或其子树，否则调用parseNodeSequence()解析节点序列。代码如下：

//GameTree: ( {[NodeSequence]|[GameTree]} )
//old GameTree: ( NodeSequence {GameTree} )
int parseGameTree(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
        char c;
        const char* szFromPos = szCollection + fromPos;
        assert(fromPos >= 0);
        assert(szFromPos[0] == '(');

        pContext->treeIndex++;
        if(pContext->pfnOnTree)
                pContext->pfnOnTree(pContext, szFromPos, pContext->treeIndex);

        fromPos++; szFromPos++;
        fromPos += skipSpaceChars(szFromPos, NULL);

        c = szCollection[fromPos];
        while(1)
        {
                if(c == '(')
                        fromPos = parseGameTree(pContext, szCollection, fromPos);
                else
                        fromPos = parseNodeSequence(pContext, szCollection, fromPos);

                szFromPos = szCollection + fromPos;
                fromPos += skipSpaceChars(szFromPos, NULL);
                c = szCollection[fromPos];
                if(c == ')')
                {
                        if(pContext->pfnOnTreeEnd)
                                pContext->pfnOnTreeEnd(pContext, pContext->treeIndex);
                        pContext->treeIndex--;
                        break;
                }
        }

        return (fromPos + 1);
}

第五步，最后一步了，解析整个SGF文本内容（parseSGF）。这是对外公开的核心接口。N个树的顺序排列，好办呀，循环调用parseGameTree()顺序解析各个树不就OK了？代码如下：

//SGFCollection: GameTree {GameTree}
int parseSGF(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
        const char* szFromPos = szCollection + fromPos;
        assert(fromPos >= 0);
        assert(szFromPos[0] == '(');
        pContext->treeIndex = -1;
        while(1)
        {
                fromPos = parseGameTree(pContext, szCollection, fromPos);
                fromPos += skipSpaceChars(szFromPos, NULL);
                szFromPos = szCollection + fromPos;
                if(szFromPos[0] != '(')
                        break;
        }
        return fromPos; 
}

测试代码：

int main(int argc, char *argv[])
{
        char* s;
        int x;
        SGFParseContext Context;
        //initSGFParseContext(&Context, onTree, onTreeEnd, onNode, onNodeEnd, onProperty, NULL);
        initSGFParseContext(&Context, onTree2, onTreeEnd2, onNode2, onNodeEnd2, onProperty2, NULL);

        //test parse property:
        {
                s = "AB[cdef]X[xyz]";
                printf("/ntest parse property: ----- /n");
                x = parseProperty(&Context, s, 0);
                x = parseProperty(&Context, s, 8);
                s = "C[ab//]cd]";
                x = parseProperty(&Context, s, 0);
        }
        //test parse node:
        {
                s = ";A[a]BB[bb]C[]";
                printf("/ntest parse node: ----- /n");
                x = parseNode(&Context, s, 0);
                s = ";A[a];BB[bb]C[]";
                x = parseNode(&Context, s, 0);
                x = parseNodeSequence(&Context, s, 0);
        }
        //test parse tree:
        {
                printf("/ntest parse tree: ----- /n");
                s = "(;A[a](;C[c](X[x])Z[z]);D[d](;E[e](F[ff])))";
                x = parseGameTree(&Context, s, 0);
        }

#if 1
        //parse real sgf file:
        {
                int len = 0;
                void* data = NULL;
                FILE* pfile = fopen("d://x.txt", "r");
                printf("/n---------- test parse real sgf file: -------- /n");
                if(pfile)
                {
                        fseek(pfile, 0, SEEK_END);
                        len = ftell(pfile);
                        assert(len > 0);
                        fseek(pfile, 0, SEEK_SET);
                        data = malloc(len);
                        assert(data);
                        fread(data, 1, len, pfile);

                        parseSGF(&Context, data, 0);

                        fclose(pfile);
                        pfile = NULL;
                }
        }
#endif

        {
                char c;
                printf("/n----- any key to exit: ----- /n");
                fflush(stdout);
                scanf("%c", &c);
        }
}

总结：整个SGF解析器结构比较清晰，只要按照EBNF定义，按部就班地逐步处理即可，不是特别复杂。但由于牵涉到文本、指针、递归，有许多细节需要注意。各位朋友不妨评估一下，自己需要花费多久可以写出类似这样一个SGF解析器？如果时间充裕，也不妨真的动手写一下，看看是否眼高手低呢？所谓的“重复发明轮子”，并非绝对的毫无意义，至少可以锻炼我的动手能力。

　　另外，有一个设计上的取舍，不知是较好还是较坏。所有的回调函数，目前都有一个 SGFParseContext* pContext ，而此前相同位置的参数是 void* pUserData。是后来考虑到回调函数可能需要访问SGFParseContext中的相关数据（如在PFN_ON_NODE中读取treeIndex），为了方便用户使用才引入pContext参数（用户也可以通过pUserData自行传入pContext，终究是多了一步）。目前的做法，似乎暴露了解析器内部结构（SGFParseContext），又似乎增强了回调函数的稳定性和扩展性（即使不改变函数原形也能通过pContext提供额外参数）。

　　虽然这个SGF解析器已应用到开源软件“M8围棋谱”(http://code.google.com/p/m8weiqipu/)中，并初步达到了实用目的，但并不能保证该解析器已达到工业强度，其实有不少情况尚未测试到，必然会有疏忽错漏之处，诚请各位朋友批评指正。

　　另注，考虑到与现有SGF格式文件的兼容性，对SGF规范中的EBNF稍做了一定扩展。

　　完整源代码请参见：
http://code.google.com/p/m8weiqipu/source/browse/trunk/sgf.h
http://code.google.com/p/m8weiqipu/source/browse/trunk/sgf.c

#ifndef __SGF_H__
#define __SGF_H__

// SGF parser, by liigo
// email: com.liigo_at_gmail.com
// blog:  http://blog.csdn.net/liigo
// www.tianqiweiqi.com


#ifdef __cplusplus
	extern "C" {
#endif

typedef struct _tagSGFParseContext SGFParseContext;

typedef void (*PFN_ON_PROPERTY) (SGFParseContext* pContext, const char* szID, const char* szValue);
typedef void (*PFN_ON_NODE) (SGFParseContext* pContext, const char* szNodeHeader);
typedef void (*PFN_ON_NODE_END) (SGFParseContext* pContext);
typedef void (*PFN_ON_TREE) (SGFParseContext* pContext, const char* szTreeHeader, int treeIndex);
typedef void (*PFN_ON_TREE_END) (SGFParseContext* pContext, int treeIndex);

typedef struct _tagSGFParseContext
{
	void* pUserData;
	int treeIndex;

	PFN_ON_TREE pfnOnTree;
	PFN_ON_TREE_END pfnOnTreeEnd;
	PFN_ON_NODE pfnOnNode;
	PFN_ON_NODE_END pfnOnNodeEnd;
	PFN_ON_PROPERTY pfnOnProperty;

	char idBuffer[16];
	char* valueBuffer;
	int valueBufferSize;
}
SGFParseContext;

void initSGFParseContext(SGFParseContext* pContext,
						 PFN_ON_TREE pfnOnTree, PFN_ON_TREE_END pfnOnTreeEnd,
						 PFN_ON_NODE pfnOnNode, PFN_ON_NODE_END pfnOnNodeEnd, 
						 PFN_ON_PROPERTY pfnOnProperty, 
						 void* pUserData);

void cleanupSGFParseContext(SGFParseContext* pContext);

int parseSGF(SGFParseContext* pContext, const char* szCollection, int fromPos);

int parseGameTree(SGFParseContext* pContext, const char* szCollection, int fromPos);
int parseNodeSequence(SGFParseContext* pContext, const char* szCollection, int fromPos);
int parseNode(SGFParseContext* pContext, const char* szCollection, int fromPos);
int parseProperty(SGFParseContext* pContext, const char* szCollection, int fromPos);


#ifdef __cplusplus
	}
#endif

#endif //__SGF_H__

#include <stdlib.h>
#include <stdio.h>
#include <memory.h>
#include <ctype.h>
#include <assert.h>

#include "sgf.h"

// SGF parser, by liigo
// email: com.liigo_at_gmail.com
// blog:  http://blog.csdn.net/liigo
// www.tianqiweiqi.com

int findchar(const char* sz, size_t n, char c)
{
	size_t i;
	if(n == -1)
	{
		char x;
		i = 0;
		while( x = sz[i] )
		{
			if(x == c) return i;
			i++;
		}
		return -1;
	}
	else
	{
		for(i = 0; i < n; i++)
		{
			if(sz[i] == c) return i;
		}
		return -1;
	}
}

//return the first non-space char's index
int skipSpaceChars(const char* s, char* endchars)
{
	const char* p = s;
	char c;
	assert(s);
	while(c = *p)
	{
		if(endchars)
		{
			if(findchar(endchars, -1, c) >= 0)
				return (p - s);
		}
		if(!isspace(c)) return (p - s);
		p++;
	}
	return (p - s);
}

//skipChars or endchars can be NULL, but can not all NULL
int skipChars(const char* s, char* skipChars, char* endchars)
{
	const char* p = s;
	char c;
	assert(s && (skipChars || endchars));
	while(c = *p)
	{
		if(skipChars)
		{
			if(findchar(skipChars, -1, c) == -1)
				return (p - s);
		}
		if(endchars)
		{
			if(findchar(endchars, -1, c) >= 0)
				return (p - s);
		}
		p++;
	}
	return (p - s);
}

int isTextPropertyID(const char* szID)
{
	static const char* textIDs[] = { "C", "N", "AP", "CA", "AN", "BR", "BT", "CP", "DT", "EV",
									 "GN", "GC", "ON", "OT", "PB", "PC", "PW", "RE", "RO", "RU",
									 "SO", "US", "WR", "WT", "FG", "LB" };
	static int n = sizeof(textIDs) / sizeof(textIDs[0]);
	int i;
	for(i = 0; i < n; i++)
	{
		if(_stricmp(szID, textIDs[i]) == 0)
			return 1;
	}
	return 0;
}

void getEnoughBuffer(SGFParseContext* pContext, int nNeedBufferSize)
{			
	if(pContext->valueBuffer == NULL || nNeedBufferSize > pContext->valueBufferSize)
	{
		int buffSize = pContext->valueBufferSize;
		if(buffSize == 0) buffSize = 1024;
		while(buffSize < nNeedBufferSize) buffSize *= 2;

		if(pContext->valueBuffer == NULL)
			pContext->valueBuffer = malloc(buffSize + 4);
		else
			pContext->valueBuffer = realloc(pContext->valueBuffer, buffSize + 4);
		pContext->valueBufferSize = buffSize;
	}
}

// [value]
int parsePropertyValue(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
	const char* szFromPos = szCollection + fromPos;
	assert(szFromPos[0] == '[');

	if(!isTextPropertyID(pContext->idBuffer))
	{
		int rindex = findchar(szFromPos, -1, ']');
		int nNeedBufferSize = rindex - 1;
		assert(rindex >= 0);
		getEnoughBuffer(pContext, nNeedBufferSize);
		memcpy(pContext->valueBuffer, szFromPos + 1, nNeedBufferSize);
		pContext->valueBuffer[nNeedBufferSize] = '\0';

		if(pContext->pfnOnProperty)
			pContext->pfnOnProperty(pContext, pContext->idBuffer, pContext->valueBuffer);

		return (fromPos + rindex + 1);
	}
	else
	{
		//parse the text or simple-text value, consider the '\' escape character
		const char* s = szFromPos + 1;
		char c;
		int in_escape = 0;
		int valuelen = 0;
		getEnoughBuffer(pContext, 1024);
		pContext->valueBuffer[0] = '\0';
		while(1)
		{
			c = *s;
			assert(c);
			if(!in_escape)
			{
				if(c == '\\')
				{
					in_escape = 1;
				}
				else if(c == ']')
				{
					break;
				}
				else
				{
					getEnoughBuffer(pContext, valuelen + 1);
					pContext->valueBuffer[valuelen++] = c;
				}
			}
			else
			{
				//ignore the newline after '\'
				if(c != '\r' && c != '\n')
				{
					getEnoughBuffer(pContext, valuelen + 1);
					pContext->valueBuffer[valuelen++] = c;
				}
				else
				{
					char nc = *(s+1);
					if(nc)
					{
						if((c=='\r' && nc=='\n') || (c=='\n' && nc=='\r'))
							s++;
					}
				}
				in_escape = 0;
			}
			s++;
		}
		getEnoughBuffer(pContext, valuelen + 1);
		pContext->valueBuffer[valuelen] = '\0';

		if(pContext->pfnOnProperty) 
			pContext->pfnOnProperty(pContext, pContext->idBuffer, pContext->valueBuffer);

		return (s - szCollection + 1);
	}
}

//Property: id [value] { [value] }
int parseProperty(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
	const char* szFromPos;
	int lindex;
	int nIDBufferSize = sizeof(pContext->idBuffer) - 1;
	assert(szCollection && fromPos >= 0);
	szFromPos = szCollection + fromPos;

	lindex = findchar(szFromPos, -1, '[');
	assert(lindex > 0 && lindex < nIDBufferSize);
	if(lindex > 0 && lindex < nIDBufferSize)
	{
		memcpy(pContext->idBuffer, szFromPos, lindex);
		pContext->idBuffer[lindex] = '\0';

		fromPos = parsePropertyValue(pContext, szCollection, fromPos + lindex);
		szFromPos = szCollection + fromPos;
		while(1)
		{
			fromPos += skipSpaceChars(szFromPos, NULL);
			if(szCollection[fromPos] != '[')
				break;
			fromPos = parsePropertyValue(pContext, szCollection, fromPos);
			szFromPos = szCollection + fromPos;
		}
		return fromPos;
	}
	return -1;
}

//Node: ; {property}
int parseNode(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
	const char* szFromPos = szCollection + fromPos;
	assert(fromPos >= 0);
	//assert(szFromPos[0] == ';');

	if(pContext->pfnOnNode) 
		pContext->pfnOnNode(pContext, szFromPos);

	if(szFromPos[0] == ';')
	{
		fromPos++; szFromPos++;
	}

	while(1)
	{
		fromPos += skipSpaceChars(szFromPos, NULL);
		if(szCollection[fromPos] == '\0' || findchar(";)(", -1, szCollection[fromPos]) >= 0)
			break;
		fromPos = parseProperty(pContext, szCollection, fromPos);
		szFromPos = szCollection + fromPos;
	}
	return fromPos;
}

//NodeSequence: node{node}
int parseNodeSequence(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
	const char* szFromPos = szCollection + fromPos;
	assert(fromPos >= 0);
	//assert(szFromPos[0] == ';');
	while(1)
	{
		fromPos = parseNode(pContext, szCollection, fromPos);
		fromPos += skipSpaceChars(szFromPos, NULL);
		szFromPos = szCollection + fromPos;
		if(szFromPos[0] != ';')
		{
			if(pContext->pfnOnNodeEnd)
				pContext->pfnOnNodeEnd(pContext);
			break;
		}
	}
	return fromPos;	
}

//GameTree: ( {[NodeSequence]|[GameTree]} )
//old GameTree: ( NodeSequence {GameTree} )
int parseGameTree(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
	char c;
	const char* szFromPos = szCollection + fromPos;
	assert(fromPos >= 0);
	assert(szFromPos[0] == '(');

	pContext->treeIndex++;
	if(pContext->pfnOnTree)
		pContext->pfnOnTree(pContext, szFromPos, pContext->treeIndex);

	fromPos++; szFromPos++;
	fromPos += skipSpaceChars(szFromPos, NULL);

	c = szCollection[fromPos];
	while(1)
	{
		if(c == '(')
			fromPos = parseGameTree(pContext, szCollection, fromPos);
		else
			fromPos = parseNodeSequence(pContext, szCollection, fromPos);

		szFromPos = szCollection + fromPos;
		fromPos += skipSpaceChars(szFromPos, NULL);
		c = szCollection[fromPos];
		if(c == ')')
		{
			if(pContext->pfnOnTreeEnd)
				pContext->pfnOnTreeEnd(pContext, pContext->treeIndex);
			pContext->treeIndex--;
			break;
		}
	}

	return (fromPos + 1);
}

//SGFCollection: GameTree {GameTree}
int parseSGF(SGFParseContext* pContext, const char* szCollection, int fromPos)
{
	const char* szFromPos = szCollection + fromPos;
	assert(fromPos >= 0);
	assert(szFromPos[0] == '(');
	pContext->treeIndex = -1;
	while(1)
	{
		fromPos = parseGameTree(pContext, szCollection, fromPos);
		fromPos += skipSpaceChars(szFromPos, NULL);
		szFromPos = szCollection + fromPos;
		if(szFromPos[0] != '(')
			break;
	}
	return fromPos;	
}

void initSGFParseContext(SGFParseContext* pContext,
						 PFN_ON_TREE pfnOnTree, PFN_ON_TREE_END pfnOnTreeEnd,
						 PFN_ON_NODE pfnOnNode, PFN_ON_NODE_END pfnOnNodeEnd, 
						 PFN_ON_PROPERTY pfnOnProperty, 
						 void* pUserData)
{
	assert(pContext);
	pContext->pfnOnTree = pfnOnTree;
	pContext->pfnOnTreeEnd = pfnOnTreeEnd;
	pContext->pfnOnNode = pfnOnNode;
	pContext->pfnOnNodeEnd = pfnOnNodeEnd;
	pContext->pfnOnProperty = pfnOnProperty;
	pContext->pUserData = pUserData;
	pContext->idBuffer[0] = '\0';
	pContext->valueBuffer = NULL;
	pContext->valueBufferSize = 0;
	pContext->treeIndex = -1;
}

void cleanupSGFParseContext(SGFParseContext* pContext)
{
	if(pContext->valueBuffer)
		free(pContext->valueBuffer);
	memset(pContext, 0, sizeof(SGFParseContext));
}

//
//-----------------------------------------------------------------------------
// 以下是测试代码

static void printheader(const char* s, int n)
{
	int i;
	for(i = 0; i < n; i++)
	{
		if(s[i] == '\0') break;
		printf("%c", s[i]);
	}
	printf(" ... \n");
	fflush(stdout);
}

static void onProperty(SGFParseContext* pContext, const char* szID, const char* szValue)
{
	printf("    onProperty: %s[%s] \n", szID, szValue);
	fflush(stdout);
}

static void onNode(SGFParseContext* pContext, const char* szNodeHeader)
{
	printf("  onNode: ");
	printheader(szNodeHeader, 32);
}

static void onNodeEnd(SGFParseContext* pContext)
{
	printf("  onNodeEnd \n");
}


static void onTree(SGFParseContext* pContext, const char* szTreeHeader, int treeIndex)
{
	printf("  onTree: ");
	printheader(szTreeHeader, 32);
}

static void onTreeEnd(SGFParseContext* pContext, int treeIndex)
{
	printf("  onTreeEnd \n");
}

//------------------

static void onProperty2(SGFParseContext* pContext, const char* szID, const char* szValue)
{
	printf("%s[%s] ", szID, szValue);
	//fflush(stdout);
}

static void onNode2(SGFParseContext* pContext, const char* szNodeHeader)
{
	printf("; ");
}

static void onNodeEnd2(SGFParseContext* pContext)
{
}

static void onTree2(SGFParseContext* pContext, const char* szTreeHeader, int treeIndex)
{
	int i;
	printf("\n");
	for(i = 0; i < treeIndex; i++)
		printf("  ");
	printf("[tree:%d]( ", treeIndex);
}

static void onTreeEnd2(SGFParseContext* pContext, int treeIndex)
{
	printf(") [end of tree %d]\n\n", treeIndex);
}

static void testParseSGFFile(SGFParseContext* pContext, const char* szFileName)
{
	int len = 0;
	void* data = NULL;
	FILE* pfile = fopen(szFileName, "r");
	printf("\n\n--- test parse sgf file: %s ---\n", szFileName);
	if(pfile)
	{
		fseek(pfile, 0, SEEK_END);
		len = ftell(pfile);
		assert(len > 0);
		fseek(pfile, 0, SEEK_SET);
		data = malloc(len);
		assert(data);
		fread(data, 1, len, pfile);

		parseSGF(pContext, data, 0);

		fclose(pfile);
		pfile = NULL;
	}
	else
	{
		printf("\n\n--- open sgf file error: %s ---\n\n", szFileName);
	}
}

////////////////////


int main(int argc, char *argv[])
{
	char* s;
	int x;
	SGFParseContext Context;
	//initSGFParseContext(&Context, onTree, onTreeEnd, onNode, onNodeEnd, onProperty, NULL);
	initSGFParseContext(&Context, onTree2, onTreeEnd2, onNode2, onNodeEnd2, onProperty2, NULL);

	//test parse property:
	{
		s = "A[a]B[b1][b2]X[xyz]";
		printf("\ntest parse property: ----- \n");
		x = parseProperty(&Context, s, 0);
		x = parseProperty(&Context, s, 4);
		s = "C[ab\\]cd]";
		x = parseProperty(&Context, s, 0);
	}
	//test parse node:
	{
		s = ";A[a]BB[bb]C[]";
		printf("\ntest parse node: ----- \n");
		x = parseNode(&Context, s, 0);
		s = ";A[a];BB[bb]C[]";
		x = parseNode(&Context, s, 0);
		x = parseNodeSequence(&Context, s, 0);
	}
	//test parse tree:
	{
		printf("\ntest parse tree: ----- \n");
		s = "(;A[a](;C[c](X[x])Z[z]);D[d](;E[e](F[ff])))";
		x = parseGameTree(&Context, s, 0);
	}

#if 1
	//parse real sgf file:
	{
		char filename[256];
		int i;

		for(i = 1; i <= 7; i++)
		{
			sprintf(filename, "%d.sgf", i);
			testParseSGFFile(&Context, filename);
		}

		for(i = 0; i <= 365; i++)
		{
			sprintf(filename, "WuQingYuan\\wqy (%d).sgf", i);
			testParseSGFFile(&Context, filename);
		}

		for(i = 1; i <= 465; i++)
		{
			sprintf(filename, "标准中国流645局\\S%05d.sgf", i);
			testParseSGFFile(&Context, filename);
		}
	}
#endif

	{
		char c;
		printf("\n----- any key to exit: ----- \n");
		fflush(stdout);
		scanf("%c", &c);
	}
}

作者：

喜欢围棋和编程。查看的所有文章

作者：

发表回复 取消回复

发表回复取消回复