ODFPY 1.2.0
 
Loading...
Searching...
No Matches
teletype.py
Go to the documentation of this file.
1# -*- coding: utf-8 -*-
2#
3# Create and extract text from ODF, handling whitespace correctly.
4# Copyright (C) 2008 J. David Eisenberg
5#
6# This program is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License along
17# with this program; if not, write to the Free Software Foundation, Inc.,
18# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
20
21
28
29
30from odf.element import Node
32from odf.text import S,LineBreak,Tab
33
35
36 def __init__(self):
37 self.textBuffer = []
38 self.spaceCount = 0
39
40
46 def addTextToElement(self, odfElement, s):
47 i = 0
48 ch = ' '
49
50 # When we encounter a tab or newline, we can immediately
51 # dump any accumulated text and then emit the appropriate
52 # ODF element.
53 #
54 # When we encounter a space, we add it to the text buffer,
55 # and then collect more spaces. If there are more spaces
56 # after the first one, we dump the text buffer and then
57 # then emit the appropriate <text:s> element.
58
59 while i < len(s):
60 ch = s[i]
61 if ch == '\t':
62 self._emitTextBuffer(odfElement)
63 odfElement.addElement(Tab())
64 i += 1
65 elif ch == '\n':
66 self._emitTextBuffer(odfElement);
67 odfElement.addElement(LineBreak())
68 i += 1
69 elif ch == ' ':
70 self.textBuffer.append(' ')
71 i += 1
72 self.spaceCount = 0
73 while i < len(s) and (s[i] == ' '):
74 self.spaceCount += 1
75 i += 1
76 if self.spaceCount > 0:
77 self._emitTextBuffer(odfElement)
78 self._emitSpaces(odfElement)
79 else:
80 self.textBuffer.append(ch)
81 i += 1
82
83 self._emitTextBuffer(odfElement)
84
85
90 def _emitTextBuffer(self, odfElement):
91 if len(self.textBuffer) > 0:
92 odfElement.addText(''.join(self.textBuffer))
93 self.textBuffer = []
94
95
96
101 def _emitSpaces(self, odfElement):
102 if self.spaceCount > 0:
103 spaceElement = S(c=self.spaceCount)
104 odfElement.addElement(spaceElement)
105 self.spaceCount = 0
106
107def addTextToElement(odfElement, s):
108 wst = WhitespaceText()
109 wst.addTextToElement(odfElement, s)
110
111
117def extractText(odfElement):
118 result = [];
119
120 if len(odfElement.childNodes) != 0:
121 for child in odfElement.childNodes:
122 if child.nodeType == Node.TEXT_NODE:
123 result.append(child.data)
124 elif child.nodeType == Node.ELEMENT_NODE:
125 subElement = child
126 tagName = subElement.qname;
127 if tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"line-break"):
128 result.append("\n")
129 elif tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"tab"):
130 result.append("\t")
131 elif tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"s"):
132 c = subElement.getAttribute('c')
133 if c:
134 spaceCount = int(c)
135 else:
136 spaceCount = 1
137
138 result.append(" " * spaceCount)
139 else:
140 result.append(extractText(subElement))
141 return ''.join(result)
_emitSpaces(self, odfElement)
Creates a <text:s> element for the current spaceCount.
Definition teletype.py:101
_emitTextBuffer(self, odfElement)
Creates a Text Node whose contents are the current textBuffer.
Definition teletype.py:90
addTextToElement(self, odfElement, s)
Process an input string, inserting <text:tab> elements for '\t', <text:line-break> elements for ' ',...
Definition teletype.py:46
extractText(odfElement)
Extract text content from an Element, with whitespace represented properly.
Definition teletype.py:117
addTextToElement(odfElement, s)
Definition teletype.py:107