Skip to content
This repository was archived by the owner on Jan 4, 2025. It is now read-only.

Commit 1f50e2f

Browse files
committed
inital somewhat cleaned up reader
0 parents  commit 1f50e2f

File tree

1 file changed

+216
-0
lines changed

1 file changed

+216
-0
lines changed

sexpr.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#!/usr/bin/env python
2+
##
3+
## sexpr.py - by Yusuke Shinyama
4+
##
5+
## * public domain *
6+
##
7+
## Stripped down for Lark by Scott Wolchok.
8+
9+
10+
class SExprIllegalClosingParenError(ValueError):
11+
pass
12+
class SExprIllegalClosingQuoteError(ValueError):
13+
pass
14+
class SExprPrematureEOFError(ValueError):
15+
pass
16+
17+
class SExprReader(object):
18+
'''Usage:
19+
20+
reader = SExprReader(consumer)
21+
reader.feed('(this is (sexpr))')
22+
reader.close()
23+
'''
24+
25+
COMMENT_BEGIN = ';'
26+
COMMENT_END = '\n'
27+
SEPARATOR = ' \t\n'
28+
PAREN_BEGIN = '('
29+
PAREN_END = ')'
30+
QUOTE = '"'
31+
ESCAPE = '\\'
32+
33+
def __init__(self, next_filter,
34+
comment_begin=COMMENT_BEGIN,
35+
comment_end=COMMENT_END,
36+
separator=SEPARATOR,
37+
paren_begin=PAREN_BEGIN,
38+
paren_end=PAREN_END,
39+
quote=QUOTE,
40+
escape=ESCAPE):
41+
self.next_filter = next_filter
42+
self.comment_begin = comment_begin
43+
self.comment_end = comment_end
44+
self.separator = separator
45+
self.paren_begin = paren_begin
46+
self.paren_end = paren_end
47+
self.quote = quote
48+
self.escape = escape
49+
self.special = comment_begin + separator + paren_begin + paren_end + quote + escape
50+
self.reset()
51+
self.symbols = set()
52+
53+
# called if redundant parantheses are found.
54+
def illegal_close_quote(self, i):
55+
raise SExprIllegalClosingQuoteError(i)
56+
def illegal_close_paren(self, i):
57+
raise SExprIllegalClosingParenError(i)
58+
def premature_eof(self, i, x):
59+
raise SExprPrematureEOFError(i, x)
60+
61+
# reset the internal states.
62+
def reset(self):
63+
self.incomment = False # if within a comment.
64+
self.inquote = False # if within a quote
65+
self.inescape = False # if within a escape.
66+
self.sym = [] # partially constructed symbol.
67+
# NOTICE: None != nil (an empty list)
68+
self.build = None # partially constructed list.
69+
self.build_stack = [] # to store a chain of partial lists.
70+
return self
71+
72+
73+
def close_str(self):
74+
# XXX: need mutable strings.
75+
sym = ''.join(self.sym)
76+
self.sym = []
77+
return sym
78+
79+
def close_symbol(self):
80+
sym = intern(self.close_str())
81+
self.symbols.add(sym)
82+
return sym
83+
84+
def feed_next(self, s):
85+
self.next_filter.feed(s)
86+
87+
# analyze strings
88+
def feed(self, tokens):
89+
for (i,c) in enumerate(tokens):
90+
if self.incomment:
91+
# within a comment - skip
92+
self.incomment = (c not in self.comment_end)
93+
elif self.inescape or (c not in self.special):
94+
# add to the current working symbol
95+
self.sym.append(c)
96+
self.inescape = False
97+
elif c in self.escape:
98+
# escape
99+
self.inescape = True
100+
elif self.inquote and (c not in self.quote):
101+
self.sym.append(c)
102+
else:
103+
# special character (blanks, parentheses, or comment)
104+
if self.sym:
105+
# close the current symbol
106+
if self.inquote:
107+
if c not in self.quote:
108+
self.illegal_close_quote(i)
109+
sym = self.close_str()
110+
else:
111+
sym = self.close_symbol()
112+
if self.build is None:
113+
self.feed_next(sym)
114+
else:
115+
self.build.append(sym)
116+
if c in self.comment_begin:
117+
# comment
118+
self.incomment = True
119+
elif c in self.quote:
120+
# quote symbol.
121+
self.inquote = not self.inquote
122+
elif c in self.paren_begin:
123+
# beginning a new list.
124+
self.build_stack.append(self.build)
125+
empty = []
126+
if self.build == None:
127+
# begin from a scratch.
128+
self.build = empty
129+
else:
130+
# begin from the end of the current list.
131+
self.build.append(empty)
132+
self.build = empty
133+
elif c in self.paren_end:
134+
# terminating the current list
135+
if self.build == None:
136+
# there must be a working list.
137+
self.illegal_close_paren(i)
138+
else:
139+
if len(self.build_stack) == 1:
140+
# current working list is the last one in the stack.
141+
self.feed_next(self.build)
142+
self.build = self.build_stack.pop()
143+
return self
144+
145+
# terminate
146+
def terminate(self):
147+
# a working list should not exist.
148+
if self.build != None:
149+
# error - still try to construct a partial structure.
150+
if self.sym:
151+
self.build.append(self.close_symbol())
152+
if len(self.build_stack) == 1:
153+
x = self.build
154+
else:
155+
x = self.build_stack[1]
156+
self.build = None
157+
self.build_stack = []
158+
self.premature_eof(len(self.build_stack), x)
159+
elif self.sym:
160+
# flush the current working symbol.
161+
self.feed_next(self.close_symbol())
162+
return self
163+
164+
# closing.
165+
def close(self):
166+
self.terminate()
167+
168+
169+
## str2sexpr
170+
##
171+
class _SExprStrConverter(object):
172+
results = []
173+
def feed(self, s):
174+
_SExprStrConverter.results.append(s)
175+
return
176+
_str_converter = SExprReader(_SExprStrConverter())
177+
# _str_converter_strict = StrictSExprReader(_SExprStrConverter())
178+
179+
def str2sexpr(s):
180+
'''parse a string as a sexpr.'''
181+
_SExprStrConverter.results = []
182+
_str_converter.reset().feed(s).terminate()
183+
return _SExprStrConverter.results
184+
# def str2sexpr_strict(s):
185+
# '''parse a string as a sexpr.'''
186+
# _SExprStrConverter.results = []
187+
# _str_converter_strict.reset().feed(s).terminate()
188+
# return _SExprStrConverter.results
189+
190+
191+
## sexpr2str
192+
##
193+
def sexpr2str(e):
194+
'''convert a sexpr into Lisp-like representation.'''
195+
if not isinstance(e, list):
196+
return e
197+
return '('+' '.join(map(sexpr2str, e))+')'
198+
199+
200+
# test stuff
201+
def test():
202+
assert str2sexpr('"string"') == ['string']
203+
assert str2sexpr('\\"string\\"') == ['"string"']
204+
assert str2sexpr('(this ;comment\n is (a test (sentences) (des()) (yo)))') == \
205+
[['this', 'is', ['a', 'test', ['sentences'], ['des', []], ['yo']]]]
206+
assert str2sexpr('''(paren\\(\\)theses_in\\#symbol "space in \nsymbol"
207+
this\\ way\\ also. "escape is \\"better than\\" quote")''') == \
208+
[['paren()theses_in#symbol', 'space in \nsymbol', 'this way also.', 'escape is "better than" quote']]
209+
assert str2sexpr('()') == [[]]
210+
# str2sexpr('(this (is (a (parial (sentence')
211+
return
212+
213+
214+
# main
215+
if __name__ == '__main__':
216+
test()

0 commit comments

Comments
 (0)