/
parse-wiki
executable file
·118 lines (96 loc) · 2.69 KB
/
parse-wiki
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env ruby
WIKIPAGE = "Termine"
WIKIURL = "http://www.das-labor.org:80/wiki/Spezial:Exportieren/Termine"
WIKIPREFIX = "http://www.das-labor.org/wiki/"
XMLFILE=ARGV[0]+"/termine.xml";
WIKILINES = [
# /\W+(?:<DATE>\d{2}.\d{2}.\d{2})\W+(?:<DAY>\w\w)\W+(?:<TEXT>.*)/,
/^\s*\*\W+(?:<DAY>\d{2}).(?:<MONTH>\d{2}).(?:<YEAR>\d{2})\W+(?:<WDAY>\w\w)\W+(?:<HOUR>\d{1,2}):(?:<MIN>\d{2})[ ,;-]+(?:<TEXT>.*)/,
/^\s*\*\W+(?:<WDAY>\w\w)\W+(?:<DAY>\d{2}).(?:<MONTH>\d{2}).(?:<YEAR>\d{2})\W+(?:<HOUR>\d{1,2}):(?:<MIN>\d{2})[ ,;-]+(?:<TEXT>.*)/
]
#############################################################################
# Starts Here
require 'http-access2'
require 'rexml/document'
require 'htmlentities/lib/htmlentities.rb'
require 'namedregexp.rb'
class LaborTermine
attr_reader "termine"
def initialize()
@termine = Array.new;
end
def getWikiContent
proxy = ENV['HTTP_PROXY']
clnt = HTTPAccess2::Client.new(proxy)
txt = clnt.get(WIKIURL).content
return HTMLEntities.decode_entities(txt);
end
def parseLine(line)
line.gsub!("'''", "");
WIKILINES.each { |re|
nre = NamedRegexp.new(re);
m = nre.match(line);
return m if !m.nil?
}
return nil
end
def parseWikiContent(wikiXml)
doc = REXML::Document.new(wikiXml)
text = doc.elements["/mediawiki/page/revision"];
text.to_s.each_line { |line|
m = parseLine(line);
if m then
aterm = Hash.new;
m.each { |a|
aterm[a[0].downcase] = a[1];
}
# lr = /\[\[\s*(.*?)\s*\]\]/; if lr.match( aterm["text"] ) then
if /\[\[\s*(.+?)\s*\|\s*(.+?)\]\]/.match( aterm["text"] ) then
wikipage=$1;
aterm["text"] = $2;
wikipage.gsub!( " ", "_" );
aterm["link"] = WIKIPREFIX+wikipage;
aterm["text"].sub!("[[", "");
aterm["text"].sub!("]]", "");
elsif /\[\[\s*(.*?)\s*\]\]/.match( aterm["text"] ) then
wikipage=$1;
wikipage.gsub!( " ", "_" );
aterm["link"] = WIKIPREFIX+wikipage;
aterm["text"].sub!("[[", "");
aterm["text"].sub!("]]", "");
elsif /^(.*?)\[(http:\S+)\s+(.*)\](.*)$/.match(aterm["text"]) then # comment
aterm["text"] = $1+" "+$3+$4;
aterm["link"] = $2;
end
@termine.push aterm;
end
}
end
# Main
def run
wikiXml = getWikiContent
parseWikiContent(wikiXml)
end
end
####
# MAIN
#
class Array
def to_h
Hash[*self]
end
end
lt = LaborTermine.new;
lt.run;
File.open( XMLFILE, "w" ) { |f|
f.puts "<termine>"
lt.termine.each { |termin|
puts "#{termin["day"]}.#{termin["month"]}.#{termin["year"]} #{termin["hour"]}:#{termin["min"]} #{termin["text"]}"
f.puts "<termin>"
termin.each { |key,val|
f.puts " <#{key}>#{val}</#{key}>"
}
f.puts "</termin>"
}
f.puts "</termine>"
}