Upload 51 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +1 -0
- Makefile +16 -0
- README +52 -0
- basic/city.cc +466 -0
- basic/city.h +90 -0
- basic/hard-ofstream.h +32 -0
- basic/indent.cc +3 -0
- basic/indent.h +18 -0
- basic/lisp.cc +129 -0
- basic/lisp.h +34 -0
- basic/logging.cc +145 -0
- basic/logging.h +122 -0
- basic/mem-tracker.cc +53 -0
- basic/mem-tracker.h +132 -0
- basic/mem.h +14 -0
- basic/multi-ostream.cc +61 -0
- basic/multi-ostream.h +67 -0
- basic/opt.cc +189 -0
- basic/opt.h +100 -0
- basic/pipe.h +46 -0
- basic/prob-utils.cc +75 -0
- basic/prob-utils.h +19 -0
- basic/stats.cc +1 -0
- basic/stats.h +71 -0
- basic/std.cc +111 -0
- basic/std.h +115 -0
- basic/stl-basic.cc +1 -0
- basic/stl-basic.h +113 -0
- basic/stl-utils.cc +1 -0
- basic/stl-utils.h +232 -0
- basic/str-str-db.cc +35 -0
- basic/str-str-db.h +19 -0
- basic/str.cc +91 -0
- basic/str.h +22 -0
- basic/strdb.cc +209 -0
- basic/strdb.h +101 -0
- basic/timer.cc +11 -0
- basic/timer.h +35 -0
- basic/union-set.cc +29 -0
- basic/union-set.h +22 -0
- cluster-viewer/LICENSE +22 -0
- cluster-viewer/README.md +26 -0
- cluster-viewer/build-viewer.sh +32 -0
- cluster-viewer/code/final.py +8 -0
- cluster-viewer/code/htmlrows.html +18 -0
- cluster-viewer/code/make_html.py +75 -0
- cluster-viewer/code/style.css +9 -0
- cluster-viewer/code/template.html +22 -0
- input.txt +3 -0
- output.txt +5 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.o
|
Makefile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 1.2: need to make sure opt.o goes in the right order to get the right scope on the command-line arguments
|
2 |
+
# Use this for Linux
|
3 |
+
ifeq ($(shell uname),Linux)
|
4 |
+
files=$(subst .cc,.o,basic/logging.cc $(shell /bin/ls *.cc) $(shell /bin/ls basic/*.cc | grep -v logging.cc))
|
5 |
+
else
|
6 |
+
files=$(subst .cc,.o,basic/opt.cc $(shell /bin/ls *.cc) $(shell /bin/ls basic/*.cc | grep -v opt.cc))
|
7 |
+
endif
|
8 |
+
|
9 |
+
wcluster: $(files)
|
10 |
+
g++ -Wall -g -std=c++0x -O3 -o wcluster $(files) -lpthread
|
11 |
+
|
12 |
+
%.o: %.cc
|
13 |
+
g++ -Wall -g -O3 -std=c++0x -o $@ -c $<
|
14 |
+
|
15 |
+
clean:
|
16 |
+
rm wcluster basic/*.o *.o
|
README
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Implementation of the Brown hierarchical word clustering algorithm.
|
2 |
+
Percy Liang
|
3 |
+
Release 1.3
|
4 |
+
2012.07.24
|
5 |
+
|
6 |
+
Input: a sequence of words separated by whitespace (see input.txt for an example).
|
7 |
+
Output: for each word type, its cluster (see output.txt for an example).
|
8 |
+
In particular, each line is:
|
9 |
+
<cluster represented as a bit string> <word> <number of times word occurs in input>
|
10 |
+
|
11 |
+
Runs in $O(N C^2)$, where $N$ is the number of word types and $C$
|
12 |
+
is the number of clusters.
|
13 |
+
|
14 |
+
References:
|
15 |
+
|
16 |
+
Brown, et al.: Class-Based n-gram Models of Natural Language
|
17 |
+
http://acl.ldc.upenn.edu/J/J92/J92-4003.pdf
|
18 |
+
|
19 |
+
Liang: Semi-supervised learning for natural language processing
|
20 |
+
http://cs.stanford.edu/~pliang/papers/meng-thesis.pdf
|
21 |
+
|
22 |
+
Compile:
|
23 |
+
|
24 |
+
make
|
25 |
+
|
26 |
+
Run:
|
27 |
+
|
28 |
+
# Clusters input.txt into 50 clusters:
|
29 |
+
./wcluster --text input.txt --c 50
|
30 |
+
# Output in input-c50-p1.out/paths
|
31 |
+
|
32 |
+
============================================================
|
33 |
+
Change Log
|
34 |
+
|
35 |
+
1.3: compatibility updates for newer versions of g++ (courtesy of Chris Dyer).
|
36 |
+
1.2: make compatible with MacOS (replaced timespec with timeval and changed order of linking).
|
37 |
+
1.1: Removed deprecated operators so it works with GCC 4.3.
|
38 |
+
|
39 |
+
============================================================
|
40 |
+
(C) Copyright 2007-2012, Percy Liang
|
41 |
+
|
42 |
+
http://cs.stanford.edu/~pliang
|
43 |
+
|
44 |
+
Permission is granted for anyone to copy, use, or modify these programs and
|
45 |
+
accompanying documents for purposes of research or education, provided this
|
46 |
+
copyright notice is retained, and note is made of any changes that have been
|
47 |
+
made.
|
48 |
+
|
49 |
+
These programs and documents are distributed without any warranty, express or
|
50 |
+
implied. As the programs were written for research purposes only, they have
|
51 |
+
not been tested to the degree that would be advisable in any important
|
52 |
+
application. All use of these programs is entirely at the user's own risk.
|
basic/city.cc
ADDED
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Copyright (c) 2011 Google, Inc.
|
2 |
+
//
|
3 |
+
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
4 |
+
// of this software and associated documentation files (the "Software"), to deal
|
5 |
+
// in the Software without restriction, including without limitation the rights
|
6 |
+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7 |
+
// copies of the Software, and to permit persons to whom the Software is
|
8 |
+
// furnished to do so, subject to the following conditions:
|
9 |
+
//
|
10 |
+
// The above copyright notice and this permission notice shall be included in
|
11 |
+
// all copies or substantial portions of the Software.
|
12 |
+
//
|
13 |
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14 |
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15 |
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16 |
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17 |
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18 |
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19 |
+
// THE SOFTWARE.
|
20 |
+
//
|
21 |
+
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
22 |
+
//
|
23 |
+
// This file provides CityHash64() and related functions.
|
24 |
+
//
|
25 |
+
// It's probably possible to create even faster hash functions by
|
26 |
+
// writing a program that systematically explores some of the space of
|
27 |
+
// possible hash functions, by using SIMD instructions, or by
|
28 |
+
// compromising on hash quality.
|
29 |
+
|
30 |
+
#include "city.h"
|
31 |
+
|
32 |
+
#include <algorithm>
|
33 |
+
#include <string.h> // for memcpy and memset
|
34 |
+
|
35 |
+
using namespace std;
|
36 |
+
|
37 |
+
static uint64 UNALIGNED_LOAD64(const char *p) {
|
38 |
+
uint64 result;
|
39 |
+
memcpy(&result, p, sizeof(result));
|
40 |
+
return result;
|
41 |
+
}
|
42 |
+
|
43 |
+
static uint32 UNALIGNED_LOAD32(const char *p) {
|
44 |
+
uint32 result;
|
45 |
+
memcpy(&result, p, sizeof(result));
|
46 |
+
return result;
|
47 |
+
}
|
48 |
+
|
49 |
+
#if !defined(WORDS_BIGENDIAN)
|
50 |
+
|
51 |
+
#define uint32_in_expected_order(x) (x)
|
52 |
+
#define uint64_in_expected_order(x) (x)
|
53 |
+
|
54 |
+
#else
|
55 |
+
|
56 |
+
#ifdef _MSC_VER
|
57 |
+
#include <stdlib.h>
|
58 |
+
#define bswap_32(x) _byteswap_ulong(x)
|
59 |
+
#define bswap_64(x) _byteswap_uint64(x)
|
60 |
+
|
61 |
+
#elif defined(__APPLE__)
|
62 |
+
// Mac OS X / Darwin features
|
63 |
+
#include <libkern/OSByteOrder.h>
|
64 |
+
#define bswap_32(x) OSSwapInt32(x)
|
65 |
+
#define bswap_64(x) OSSwapInt64(x)
|
66 |
+
|
67 |
+
#else
|
68 |
+
#include <byteswap.h>
|
69 |
+
#endif
|
70 |
+
|
71 |
+
#define uint32_in_expected_order(x) (bswap_32(x))
|
72 |
+
#define uint64_in_expected_order(x) (bswap_64(x))
|
73 |
+
|
74 |
+
#endif // WORDS_BIGENDIAN
|
75 |
+
|
76 |
+
#if !defined(LIKELY)
|
77 |
+
#if HAVE_BUILTIN_EXPECT
|
78 |
+
#define LIKELY(x) (__builtin_expect(!!(x), 1))
|
79 |
+
#else
|
80 |
+
#define LIKELY(x) (x)
|
81 |
+
#endif
|
82 |
+
#endif
|
83 |
+
|
84 |
+
static uint64 Fetch64(const char *p) {
|
85 |
+
return uint64_in_expected_order(UNALIGNED_LOAD64(p));
|
86 |
+
}
|
87 |
+
|
88 |
+
static uint32 Fetch32(const char *p) {
|
89 |
+
return uint32_in_expected_order(UNALIGNED_LOAD32(p));
|
90 |
+
}
|
91 |
+
|
92 |
+
// Some primes between 2^63 and 2^64 for various uses.
|
93 |
+
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
|
94 |
+
static const uint64 k1 = 0xb492b66fbe98f273ULL;
|
95 |
+
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
|
96 |
+
static const uint64 k3 = 0xc949d7c7509e6557ULL;
|
97 |
+
|
98 |
+
// Bitwise right rotate. Normally this will compile to a single
|
99 |
+
// instruction, especially if the shift is a manifest constant.
|
100 |
+
static uint64 Rotate(uint64 val, int shift) {
|
101 |
+
// Avoid shifting by 64: doing so yields an undefined result.
|
102 |
+
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
|
103 |
+
}
|
104 |
+
|
105 |
+
// Equivalent to Rotate(), but requires the second arg to be non-zero.
|
106 |
+
// On x86-64, and probably others, it's possible for this to compile
|
107 |
+
// to a single instruction if both args are already in registers.
|
108 |
+
static uint64 RotateByAtLeast1(uint64 val, int shift) {
|
109 |
+
return (val >> shift) | (val << (64 - shift));
|
110 |
+
}
|
111 |
+
|
112 |
+
static uint64 ShiftMix(uint64 val) {
|
113 |
+
return val ^ (val >> 47);
|
114 |
+
}
|
115 |
+
|
116 |
+
static uint64 HashLen16(uint64 u, uint64 v) {
|
117 |
+
return Hash128to64(uint128(u, v));
|
118 |
+
}
|
119 |
+
|
120 |
+
static uint64 HashLen0to16(const char *s, size_t len) {
|
121 |
+
if (len > 8) {
|
122 |
+
uint64 a = Fetch64(s);
|
123 |
+
uint64 b = Fetch64(s + len - 8);
|
124 |
+
return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
|
125 |
+
}
|
126 |
+
if (len >= 4) {
|
127 |
+
uint64 a = Fetch32(s);
|
128 |
+
return HashLen16(len + (a << 3), Fetch32(s + len - 4));
|
129 |
+
}
|
130 |
+
if (len > 0) {
|
131 |
+
uint8 a = s[0];
|
132 |
+
uint8 b = s[len >> 1];
|
133 |
+
uint8 c = s[len - 1];
|
134 |
+
uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
|
135 |
+
uint32 z = len + (static_cast<uint32>(c) << 2);
|
136 |
+
return ShiftMix(y * k2 ^ z * k3) * k2;
|
137 |
+
}
|
138 |
+
return k2;
|
139 |
+
}
|
140 |
+
|
141 |
+
// This probably works well for 16-byte strings as well, but it may be overkill
|
142 |
+
// in that case.
|
143 |
+
static uint64 HashLen17to32(const char *s, size_t len) {
|
144 |
+
uint64 a = Fetch64(s) * k1;
|
145 |
+
uint64 b = Fetch64(s + 8);
|
146 |
+
uint64 c = Fetch64(s + len - 8) * k2;
|
147 |
+
uint64 d = Fetch64(s + len - 16) * k0;
|
148 |
+
return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
|
149 |
+
a + Rotate(b ^ k3, 20) - c + len);
|
150 |
+
}
|
151 |
+
|
152 |
+
// Return a 16-byte hash for 48 bytes. Quick and dirty.
|
153 |
+
// Callers do best to use "random-looking" values for a and b.
|
154 |
+
static pair<uint64, uint64> WeakHashLen32WithSeeds(
|
155 |
+
uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
|
156 |
+
a += w;
|
157 |
+
b = Rotate(b + a + z, 21);
|
158 |
+
uint64 c = a;
|
159 |
+
a += x;
|
160 |
+
a += y;
|
161 |
+
b += Rotate(a, 44);
|
162 |
+
return make_pair(a + z, b + c);
|
163 |
+
}
|
164 |
+
|
165 |
+
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
|
166 |
+
static pair<uint64, uint64> WeakHashLen32WithSeeds(
|
167 |
+
const char* s, uint64 a, uint64 b) {
|
168 |
+
return WeakHashLen32WithSeeds(Fetch64(s),
|
169 |
+
Fetch64(s + 8),
|
170 |
+
Fetch64(s + 16),
|
171 |
+
Fetch64(s + 24),
|
172 |
+
a,
|
173 |
+
b);
|
174 |
+
}
|
175 |
+
|
176 |
+
// Return an 8-byte hash for 33 to 64 bytes.
|
177 |
+
static uint64 HashLen33to64(const char *s, size_t len) {
|
178 |
+
uint64 z = Fetch64(s + 24);
|
179 |
+
uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
|
180 |
+
uint64 b = Rotate(a + z, 52);
|
181 |
+
uint64 c = Rotate(a, 37);
|
182 |
+
a += Fetch64(s + 8);
|
183 |
+
c += Rotate(a, 7);
|
184 |
+
a += Fetch64(s + 16);
|
185 |
+
uint64 vf = a + z;
|
186 |
+
uint64 vs = b + Rotate(a, 31) + c;
|
187 |
+
a = Fetch64(s + 16) + Fetch64(s + len - 32);
|
188 |
+
z = Fetch64(s + len - 8);
|
189 |
+
b = Rotate(a + z, 52);
|
190 |
+
c = Rotate(a, 37);
|
191 |
+
a += Fetch64(s + len - 24);
|
192 |
+
c += Rotate(a, 7);
|
193 |
+
a += Fetch64(s + len - 16);
|
194 |
+
uint64 wf = a + z;
|
195 |
+
uint64 ws = b + Rotate(a, 31) + c;
|
196 |
+
uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
|
197 |
+
return ShiftMix(r * k0 + vs) * k2;
|
198 |
+
}
|
199 |
+
|
200 |
+
uint64 CityHash64(const char *s, size_t len) {
|
201 |
+
if (len <= 32) {
|
202 |
+
if (len <= 16) {
|
203 |
+
return HashLen0to16(s, len);
|
204 |
+
} else {
|
205 |
+
return HashLen17to32(s, len);
|
206 |
+
}
|
207 |
+
} else if (len <= 64) {
|
208 |
+
return HashLen33to64(s, len);
|
209 |
+
}
|
210 |
+
|
211 |
+
// For strings over 64 bytes we hash the end first, and then as we
|
212 |
+
// loop we keep 56 bytes of state: v, w, x, y, and z.
|
213 |
+
uint64 x = Fetch64(s + len - 40);
|
214 |
+
uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
|
215 |
+
uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
|
216 |
+
pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
|
217 |
+
pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
|
218 |
+
x = x * k1 + Fetch64(s);
|
219 |
+
|
220 |
+
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
|
221 |
+
len = (len - 1) & ~static_cast<size_t>(63);
|
222 |
+
do {
|
223 |
+
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
224 |
+
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
225 |
+
x ^= w.second;
|
226 |
+
y += v.first + Fetch64(s + 40);
|
227 |
+
z = Rotate(z + w.first, 33) * k1;
|
228 |
+
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
229 |
+
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
230 |
+
std::swap(z, x);
|
231 |
+
s += 64;
|
232 |
+
len -= 64;
|
233 |
+
} while (len != 0);
|
234 |
+
return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
|
235 |
+
HashLen16(v.second, w.second) + x);
|
236 |
+
}
|
237 |
+
|
238 |
+
uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
|
239 |
+
return CityHash64WithSeeds(s, len, k2, seed);
|
240 |
+
}
|
241 |
+
|
242 |
+
uint64 CityHash64WithSeeds(const char *s, size_t len,
|
243 |
+
uint64 seed0, uint64 seed1) {
|
244 |
+
return HashLen16(CityHash64(s, len) - seed0, seed1);
|
245 |
+
}
|
246 |
+
|
247 |
+
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
|
248 |
+
// of any length representable in signed long. Based on City and Murmur.
|
249 |
+
static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
|
250 |
+
uint64 a = Uint128Low64(seed);
|
251 |
+
uint64 b = Uint128High64(seed);
|
252 |
+
uint64 c = 0;
|
253 |
+
uint64 d = 0;
|
254 |
+
signed long l = len - 16;
|
255 |
+
if (l <= 0) { // len <= 16
|
256 |
+
a = ShiftMix(a * k1) * k1;
|
257 |
+
c = b * k1 + HashLen0to16(s, len);
|
258 |
+
d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
|
259 |
+
} else { // len > 16
|
260 |
+
c = HashLen16(Fetch64(s + len - 8) + k1, a);
|
261 |
+
d = HashLen16(b + len, c + Fetch64(s + len - 16));
|
262 |
+
a += d;
|
263 |
+
do {
|
264 |
+
a ^= ShiftMix(Fetch64(s) * k1) * k1;
|
265 |
+
a *= k1;
|
266 |
+
b ^= a;
|
267 |
+
c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
|
268 |
+
c *= k1;
|
269 |
+
d ^= c;
|
270 |
+
s += 16;
|
271 |
+
l -= 16;
|
272 |
+
} while (l > 0);
|
273 |
+
}
|
274 |
+
a = HashLen16(a, c);
|
275 |
+
b = HashLen16(d, b);
|
276 |
+
return uint128(a ^ b, HashLen16(b, a));
|
277 |
+
}
|
278 |
+
|
279 |
+
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
|
280 |
+
if (len < 128) {
|
281 |
+
return CityMurmur(s, len, seed);
|
282 |
+
}
|
283 |
+
|
284 |
+
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
|
285 |
+
// v, w, x, y, and z.
|
286 |
+
pair<uint64, uint64> v, w;
|
287 |
+
uint64 x = Uint128Low64(seed);
|
288 |
+
uint64 y = Uint128High64(seed);
|
289 |
+
uint64 z = len * k1;
|
290 |
+
v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
|
291 |
+
v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
|
292 |
+
w.first = Rotate(y + z, 35) * k1 + x;
|
293 |
+
w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
|
294 |
+
|
295 |
+
// This is the same inner loop as CityHash64(), manually unrolled.
|
296 |
+
do {
|
297 |
+
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
298 |
+
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
299 |
+
x ^= w.second;
|
300 |
+
y += v.first + Fetch64(s + 40);
|
301 |
+
z = Rotate(z + w.first, 33) * k1;
|
302 |
+
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
303 |
+
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
304 |
+
std::swap(z, x);
|
305 |
+
s += 64;
|
306 |
+
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
307 |
+
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
308 |
+
x ^= w.second;
|
309 |
+
y += v.first + Fetch64(s + 40);
|
310 |
+
z = Rotate(z + w.first, 33) * k1;
|
311 |
+
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
312 |
+
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
313 |
+
std::swap(z, x);
|
314 |
+
s += 64;
|
315 |
+
len -= 128;
|
316 |
+
} while (LIKELY(len >= 128));
|
317 |
+
x += Rotate(v.first + z, 49) * k0;
|
318 |
+
z += Rotate(w.first, 37) * k0;
|
319 |
+
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
|
320 |
+
for (size_t tail_done = 0; tail_done < len; ) {
|
321 |
+
tail_done += 32;
|
322 |
+
y = Rotate(x + y, 42) * k0 + v.second;
|
323 |
+
w.first += Fetch64(s + len - tail_done + 16);
|
324 |
+
x = x * k0 + w.first;
|
325 |
+
z += w.second + Fetch64(s + len - tail_done);
|
326 |
+
w.second += v.first;
|
327 |
+
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
|
328 |
+
}
|
329 |
+
// At this point our 56 bytes of state should contain more than
|
330 |
+
// enough information for a strong 128-bit hash. We use two
|
331 |
+
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
|
332 |
+
x = HashLen16(x, v.first);
|
333 |
+
y = HashLen16(y + z, w.first);
|
334 |
+
return uint128(HashLen16(x + v.second, w.second) + y,
|
335 |
+
HashLen16(x + w.second, y + v.second));
|
336 |
+
}
|
337 |
+
|
338 |
+
uint128 CityHash128(const char *s, size_t len) {
|
339 |
+
if (len >= 16) {
|
340 |
+
return CityHash128WithSeed(s + 16,
|
341 |
+
len - 16,
|
342 |
+
uint128(Fetch64(s) ^ k3,
|
343 |
+
Fetch64(s + 8)));
|
344 |
+
} else if (len >= 8) {
|
345 |
+
return CityHash128WithSeed(NULL,
|
346 |
+
0,
|
347 |
+
uint128(Fetch64(s) ^ (len * k0),
|
348 |
+
Fetch64(s + len - 8) ^ k1));
|
349 |
+
} else {
|
350 |
+
return CityHash128WithSeed(s, len, uint128(k0, k1));
|
351 |
+
}
|
352 |
+
}
|
353 |
+
|
354 |
+
#ifdef __SSE4_2__
|
355 |
+
#include <citycrc.h>
|
356 |
+
#include <nmmintrin.h>
|
357 |
+
|
358 |
+
// Requires len >= 240.
|
359 |
+
static void CityHashCrc256Long(const char *s, size_t len,
|
360 |
+
uint32 seed, uint64 *result) {
|
361 |
+
uint64 a = Fetch64(s + 56) + k0;
|
362 |
+
uint64 b = Fetch64(s + 96) + k0;
|
363 |
+
uint64 c = result[0] = HashLen16(b, len);
|
364 |
+
uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
|
365 |
+
uint64 e = Fetch64(s + 184) + seed;
|
366 |
+
uint64 f = seed;
|
367 |
+
uint64 g = 0;
|
368 |
+
uint64 h = 0;
|
369 |
+
uint64 i = 0;
|
370 |
+
uint64 j = 0;
|
371 |
+
uint64 t = c + d;
|
372 |
+
|
373 |
+
// 240 bytes of input per iter.
|
374 |
+
size_t iters = len / 240;
|
375 |
+
len -= iters * 240;
|
376 |
+
do {
|
377 |
+
#define CHUNK(multiplier, z) \
|
378 |
+
{ \
|
379 |
+
uint64 old_a = a; \
|
380 |
+
a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \
|
381 |
+
b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \
|
382 |
+
c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \
|
383 |
+
d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \
|
384 |
+
e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \
|
385 |
+
t = old_a; \
|
386 |
+
} \
|
387 |
+
f = _mm_crc32_u64(f, a); \
|
388 |
+
g = _mm_crc32_u64(g, b); \
|
389 |
+
h = _mm_crc32_u64(h, c); \
|
390 |
+
i = _mm_crc32_u64(i, d); \
|
391 |
+
j = _mm_crc32_u64(j, e); \
|
392 |
+
s += 40
|
393 |
+
|
394 |
+
CHUNK(1, 1); CHUNK(k0, 0);
|
395 |
+
CHUNK(1, 1); CHUNK(k0, 0);
|
396 |
+
CHUNK(1, 1); CHUNK(k0, 0);
|
397 |
+
} while (--iters > 0);
|
398 |
+
|
399 |
+
while (len >= 40) {
|
400 |
+
CHUNK(k0, 0);
|
401 |
+
len -= 40;
|
402 |
+
}
|
403 |
+
if (len > 0) {
|
404 |
+
s = s + len - 40;
|
405 |
+
CHUNK(k0, 0);
|
406 |
+
}
|
407 |
+
j += i << 32;
|
408 |
+
a = HashLen16(a, j);
|
409 |
+
h += g << 32;
|
410 |
+
b += h;
|
411 |
+
c = HashLen16(c, f) + i;
|
412 |
+
d = HashLen16(d, e + result[0]);
|
413 |
+
j += e;
|
414 |
+
i += HashLen16(h, t);
|
415 |
+
e = HashLen16(a, d) + j;
|
416 |
+
f = HashLen16(b, c) + a;
|
417 |
+
g = HashLen16(j, i) + c;
|
418 |
+
result[0] = e + f + g + h;
|
419 |
+
a = ShiftMix((a + g) * k0) * k0 + b;
|
420 |
+
result[1] += a + result[0];
|
421 |
+
a = ShiftMix(a * k0) * k0 + c;
|
422 |
+
result[2] = a + result[1];
|
423 |
+
a = ShiftMix((a + e) * k0) * k0;
|
424 |
+
result[3] = a + result[2];
|
425 |
+
}
|
426 |
+
|
427 |
+
// Requires len < 240.
|
428 |
+
static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
|
429 |
+
char buf[240];
|
430 |
+
memcpy(buf, s, len);
|
431 |
+
memset(buf + len, 0, 240 - len);
|
432 |
+
CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
|
433 |
+
}
|
434 |
+
|
435 |
+
void CityHashCrc256(const char *s, size_t len, uint64 *result) {
|
436 |
+
if (LIKELY(len >= 240)) {
|
437 |
+
CityHashCrc256Long(s, len, 0, result);
|
438 |
+
} else {
|
439 |
+
CityHashCrc256Short(s, len, result);
|
440 |
+
}
|
441 |
+
}
|
442 |
+
|
443 |
+
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
|
444 |
+
if (len <= 900) {
|
445 |
+
return CityHash128WithSeed(s, len, seed);
|
446 |
+
} else {
|
447 |
+
uint64 result[4];
|
448 |
+
CityHashCrc256(s, len, result);
|
449 |
+
uint64 u = Uint128High64(seed) + result[0];
|
450 |
+
uint64 v = Uint128Low64(seed) + result[1];
|
451 |
+
return uint128(HashLen16(u, v + result[2]),
|
452 |
+
HashLen16(Rotate(v, 32), u * k0 + result[3]));
|
453 |
+
}
|
454 |
+
}
|
455 |
+
|
456 |
+
uint128 CityHashCrc128(const char *s, size_t len) {
|
457 |
+
if (len <= 900) {
|
458 |
+
return CityHash128(s, len);
|
459 |
+
} else {
|
460 |
+
uint64 result[4];
|
461 |
+
CityHashCrc256(s, len, result);
|
462 |
+
return uint128(result[2], result[3]);
|
463 |
+
}
|
464 |
+
}
|
465 |
+
|
466 |
+
#endif
|
basic/city.h
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Copyright (c) 2011 Google, Inc.
|
2 |
+
//
|
3 |
+
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
4 |
+
// of this software and associated documentation files (the "Software"), to deal
|
5 |
+
// in the Software without restriction, including without limitation the rights
|
6 |
+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7 |
+
// copies of the Software, and to permit persons to whom the Software is
|
8 |
+
// furnished to do so, subject to the following conditions:
|
9 |
+
//
|
10 |
+
// The above copyright notice and this permission notice shall be included in
|
11 |
+
// all copies or substantial portions of the Software.
|
12 |
+
//
|
13 |
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14 |
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15 |
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16 |
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17 |
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18 |
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19 |
+
// THE SOFTWARE.
|
20 |
+
//
|
21 |
+
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
22 |
+
//
|
23 |
+
// This file provides a few functions for hashing strings. On x86-64
|
24 |
+
// hardware in 2011, CityHash64() is faster than other high-quality
|
25 |
+
// hash functions, such as Murmur. This is largely due to higher
|
26 |
+
// instruction-level parallelism. CityHash64() and CityHash128() also perform
|
27 |
+
// well on hash-quality tests.
|
28 |
+
//
|
29 |
+
// CityHash128() is optimized for relatively long strings and returns
|
30 |
+
// a 128-bit hash. For strings more than about 2000 bytes it can be
|
31 |
+
// faster than CityHash64().
|
32 |
+
//
|
33 |
+
// Functions in the CityHash family are not suitable for cryptography.
|
34 |
+
//
|
35 |
+
// WARNING: This code has not been tested on big-endian platforms!
|
36 |
+
// It is known to work well on little-endian platforms that have a small penalty
|
37 |
+
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
|
38 |
+
//
|
39 |
+
// By the way, for some hash functions, given strings a and b, the hash
|
40 |
+
// of a+b is easily derived from the hashes of a and b. This property
|
41 |
+
// doesn't hold for any hash functions in this file.
|
42 |
+
|
43 |
+
#ifndef CITY_HASH_H_
|
44 |
+
#define CITY_HASH_H_
|
45 |
+
|
46 |
+
#include <stdlib.h> // for size_t.
|
47 |
+
#include <stdint.h>
|
48 |
+
#include <utility>
|
49 |
+
|
50 |
+
typedef uint8_t uint8;
|
51 |
+
typedef uint32_t uint32;
|
52 |
+
typedef uint64_t uint64;
|
53 |
+
typedef std::pair<uint64, uint64> uint128;
|
54 |
+
|
55 |
+
inline uint64 Uint128Low64(const uint128& x) { return x.first; }
|
56 |
+
inline uint64 Uint128High64(const uint128& x) { return x.second; }
|
57 |
+
|
58 |
+
// Hash function for a byte array.
|
59 |
+
uint64 CityHash64(const char *buf, size_t len);
|
60 |
+
|
61 |
+
// Hash function for a byte array. For convenience, a 64-bit seed is also
|
62 |
+
// hashed into the result.
|
63 |
+
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
|
64 |
+
|
65 |
+
// Hash function for a byte array. For convenience, two seeds are also
|
66 |
+
// hashed into the result.
|
67 |
+
uint64 CityHash64WithSeeds(const char *buf, size_t len,
|
68 |
+
uint64 seed0, uint64 seed1);
|
69 |
+
|
70 |
+
// Hash function for a byte array.
|
71 |
+
uint128 CityHash128(const char *s, size_t len);
|
72 |
+
|
73 |
+
// Hash function for a byte array. For convenience, a 128-bit seed is also
|
74 |
+
// hashed into the result.
|
75 |
+
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
|
76 |
+
|
77 |
+
// Hash 128 input bits down to 64 bits of output.
|
78 |
+
// This is intended to be a reasonably good hash function.
|
79 |
+
inline uint64 Hash128to64(const uint128& x) {
|
80 |
+
// Murmur-inspired hashing.
|
81 |
+
const uint64 kMul = 0x9ddfea08eb382d69ULL;
|
82 |
+
uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
|
83 |
+
a ^= (a >> 47);
|
84 |
+
uint64 b = (Uint128High64(x) ^ a) * kMul;
|
85 |
+
b ^= (b >> 47);
|
86 |
+
b *= kMul;
|
87 |
+
return b;
|
88 |
+
}
|
89 |
+
|
90 |
+
#endif // CITY_HASH_H_
|
basic/hard-ofstream.h
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __HARD_OFSTREAM_H__
|
2 |
+
#define __HARD_OFSTREAM_H__
|
3 |
+
|
4 |
+
// On AFS, flushing a file writes it to the local disk but not AFS.
|
5 |
+
// Hard flushing ensures that the file will be written, by closing
|
6 |
+
// and re-opening the file.
|
7 |
+
|
8 |
+
#include <fstream>
|
9 |
+
#include <string>
|
10 |
+
|
11 |
+
using namespace std;
|
12 |
+
|
13 |
+
class hard_ofstream : public ofstream {
|
14 |
+
public:
|
15 |
+
hard_ofstream() { }
|
16 |
+
hard_ofstream(const char *file, ofstream::openmode mode = ofstream::trunc) { open(file, mode); }
|
17 |
+
|
18 |
+
void open(const char *file, ofstream::openmode mode = ofstream::trunc) {
|
19 |
+
ofstream::open(file, mode);
|
20 |
+
this->file = file;
|
21 |
+
}
|
22 |
+
|
23 |
+
void hard_flush() {
|
24 |
+
close();
|
25 |
+
open(file.c_str(), ofstream::app);
|
26 |
+
}
|
27 |
+
|
28 |
+
private:
|
29 |
+
string file;
|
30 |
+
};
|
31 |
+
|
32 |
+
#endif
|
basic/indent.cc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
#include "indent.h"
|
2 |
+
|
3 |
+
#include "opt.h"
|
basic/indent.h
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __INDENT_H__
|
2 |
+
#define __INDENT_H__
|
3 |
+
|
4 |
+
#include <iostream>
|
5 |
+
|
6 |
+
using namespace std;
|
7 |
+
|
8 |
+
struct Indent {
|
9 |
+
Indent(int level) : level(level) { }
|
10 |
+
int level;
|
11 |
+
};
|
12 |
+
|
13 |
+
inline ostream &operator<<(ostream &out, const Indent &ind) {
|
14 |
+
for(int i = 0; i < ind.level; i++) out << " ";
|
15 |
+
return out;
|
16 |
+
}
|
17 |
+
|
18 |
+
#endif
|
basic/lisp.cc
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "lisp.h"
|
2 |
+
#include "std.h"
|
3 |
+
#include "indent.h"
|
4 |
+
|
5 |
+
void LispNode::destroy() {
|
6 |
+
forvec(_, LispNode *, node, children) {
|
7 |
+
node->destroy();
|
8 |
+
delete node;
|
9 |
+
}
|
10 |
+
}
|
11 |
+
|
12 |
+
void LispNode::print(intIndex ind) const {
|
13 |
+
cout << Indent(ind) << (value.empty() ? "(empty)" : value) << endl;
|
14 |
+
forvec(_, LispNode *, subnode, children)
|
15 |
+
subnode->print(ind+1);
|
16 |
+
}
|
17 |
+
|
18 |
+
////////////////////////////////////////////////////////////
|
19 |
+
|
20 |
+
LispTree::~LispTree() {
|
21 |
+
root->destroy();
|
22 |
+
delete root;
|
23 |
+
}
|
24 |
+
|
25 |
+
bool is_paren(char c) {
|
26 |
+
return c == '(' || c == ')' || c == '[' || c == ']';
|
27 |
+
}
|
28 |
+
bool is_paren(string s) {
|
29 |
+
return s == "(" || s == ")" || s == "[" || s == "]";
|
30 |
+
}
|
31 |
+
bool is_left_paren(string s) {
|
32 |
+
return s == "(" || s == "[";
|
33 |
+
}
|
34 |
+
bool is_right_paren(string s) {
|
35 |
+
return s == ")" || s == "]";
|
36 |
+
}
|
37 |
+
string matching_right_paren(char c) {
|
38 |
+
if(c == '(') return ")";
|
39 |
+
if(c == '[') return "]";
|
40 |
+
return "";
|
41 |
+
}
|
42 |
+
|
43 |
+
// Return first non-space character.
|
44 |
+
char skip_space(istream &in) {
|
45 |
+
char c;
|
46 |
+
while(true) {
|
47 |
+
c = in.peek();
|
48 |
+
if(!isspace(c)) break;
|
49 |
+
in.get();
|
50 |
+
}
|
51 |
+
return c;
|
52 |
+
}
|
53 |
+
|
54 |
+
// Comments start with # and end with the line.
|
55 |
+
// There must be a space before the #.
|
56 |
+
char skip_comments(istream &in) {
|
57 |
+
while(true) {
|
58 |
+
char c = skip_space(in);
|
59 |
+
if(c == '#')
|
60 |
+
while((c = in.peek()) != '\n') in.get();
|
61 |
+
else
|
62 |
+
return c;
|
63 |
+
}
|
64 |
+
}
|
65 |
+
|
66 |
+
bool LispTree::read_token(istream &in, string &s) {
|
67 |
+
char c = skip_comments(in);
|
68 |
+
|
69 |
+
if(is_paren(c)) {
|
70 |
+
s = in.get();
|
71 |
+
return true;
|
72 |
+
}
|
73 |
+
|
74 |
+
s = "";
|
75 |
+
while(true) {
|
76 |
+
c = in.peek();
|
77 |
+
if(c == EOF) return false;
|
78 |
+
if(isspace(c) || is_paren(c)) break;
|
79 |
+
s += in.get();
|
80 |
+
}
|
81 |
+
|
82 |
+
return true;
|
83 |
+
}
|
84 |
+
|
85 |
+
LispNode *LispTree::read_node(const vector<string> &tokens, intIndex &i) {
|
86 |
+
LispNode *node = new LispNode();
|
87 |
+
assert(i < len(tokens));
|
88 |
+
|
89 |
+
string s = tokens[i++];
|
90 |
+
if(is_left_paren(s)) {
|
91 |
+
char left_paren = s[0];
|
92 |
+
|
93 |
+
if(left_paren == '(') {
|
94 |
+
assert(i < len(tokens) && !is_paren(tokens[i]));
|
95 |
+
node->value = tokens[i++];
|
96 |
+
}
|
97 |
+
|
98 |
+
while(i < len(tokens) && !is_right_paren(tokens[i])) {
|
99 |
+
node->children.push_back(read_node(tokens, i));
|
100 |
+
}
|
101 |
+
|
102 |
+
assert(i < len(tokens));
|
103 |
+
s = tokens[i++];
|
104 |
+
assert(s == matching_right_paren(left_paren));
|
105 |
+
}
|
106 |
+
else if(is_right_paren(s))
|
107 |
+
assert(false);
|
108 |
+
else
|
109 |
+
node->value = s;
|
110 |
+
|
111 |
+
return node;
|
112 |
+
}
|
113 |
+
|
114 |
+
void LispTree::read(const char *file) {
|
115 |
+
ifstream in(file);
|
116 |
+
vector<string> tokens;
|
117 |
+
string token;
|
118 |
+
while(read_token(in, token)) {
|
119 |
+
tokens.push_back(token);
|
120 |
+
}
|
121 |
+
intIndex i = 0;
|
122 |
+
root = read_node(tokens, i);
|
123 |
+
assert(i == len(tokens));
|
124 |
+
}
|
125 |
+
|
126 |
+
void LispTree::print() const {
|
127 |
+
assert(root);
|
128 |
+
root->print(0);
|
129 |
+
}
|
basic/lisp.h
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __LISP_H__
|
2 |
+
#define __LISP_H__
|
3 |
+
|
4 |
+
#include <vector>
|
5 |
+
#include <string>
|
6 |
+
#include "std.h"
|
7 |
+
|
8 |
+
using namespace std;
|
9 |
+
|
10 |
+
////////////////////////////////////////////////////////////
|
11 |
+
|
12 |
+
struct LispNode {
|
13 |
+
void destroy();
|
14 |
+
void print(intIndex ind) const;
|
15 |
+
|
16 |
+
string value;
|
17 |
+
vector<LispNode *> children;
|
18 |
+
};
|
19 |
+
|
20 |
+
////////////////////////////////////////////////////////////
|
21 |
+
|
22 |
+
struct LispTree {
|
23 |
+
LispTree() : root(NULL) { }
|
24 |
+
~LispTree();
|
25 |
+
|
26 |
+
bool read_token(istream &in, string &s);
|
27 |
+
LispNode *read_node(const vector<string> &tokens, intIndex &i);
|
28 |
+
void read(const char *file);
|
29 |
+
void print() const;
|
30 |
+
|
31 |
+
LispNode *root;
|
32 |
+
};
|
33 |
+
|
34 |
+
#endif
|
basic/logging.cc
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "logging.h"
|
2 |
+
#include "opt.h"
|
3 |
+
#include "mem.h"
|
4 |
+
|
5 |
+
// The logging output has a tree structure, where each node is a
|
6 |
+
// line of output, and the depth of a node is its indent level.
|
7 |
+
// A run is the sequence of children of some node.
|
8 |
+
// A subset of the lines in the run will get printed.
|
9 |
+
|
10 |
+
////////////////////////////////////////////////////////////
|
11 |
+
|
12 |
+
void Run::init() {
|
13 |
+
num_lines = 0;
|
14 |
+
num_lines_printed = 0;
|
15 |
+
next_line_to_print = 0;
|
16 |
+
print_all_lines = false;
|
17 |
+
timer.start();
|
18 |
+
}
|
19 |
+
|
20 |
+
void Run::finish() {
|
21 |
+
// Make it clear that this run is not printed.
|
22 |
+
// Otherwise, logss might think its
|
23 |
+
// parent was printed when it really wasn't.
|
24 |
+
next_line_to_print = -1;
|
25 |
+
timer.stop();
|
26 |
+
}
|
27 |
+
|
28 |
+
bool Run::new_line() {
|
29 |
+
bool p = print();
|
30 |
+
num_lines++;
|
31 |
+
if(!p) return false;
|
32 |
+
|
33 |
+
// We're going to print this line. Now decide next line to print.
|
34 |
+
int ms_per_line = log_info.ms_per_line;
|
35 |
+
if(num_lines <= 2 || // Print first few lines anyway.
|
36 |
+
ms_per_line == 0 || // Print everything.
|
37 |
+
print_all_lines) // Print every line in this run.
|
38 |
+
next_line_to_print++;
|
39 |
+
else {
|
40 |
+
timer.stop();
|
41 |
+
if(timer.ms == 0) // No time has elapsed.
|
42 |
+
next_line_to_print *= 2; // Exponentially increase time between lines.
|
43 |
+
else
|
44 |
+
next_line_to_print += max(int((double)num_lines * ms_per_line / timer.ms), 1);
|
45 |
+
}
|
46 |
+
|
47 |
+
num_lines_printed++;
|
48 |
+
return true;
|
49 |
+
}
|
50 |
+
|
51 |
+
////////////////////////////////////////////////////////////
|
52 |
+
// Global information about logging.
|
53 |
+
|
54 |
+
LogInfo::LogInfo() {
|
55 |
+
ms_per_line = 0; //1000; // 1 second
|
56 |
+
max_ind_level = 3;
|
57 |
+
|
58 |
+
ind_level = 0;
|
59 |
+
buf = "";
|
60 |
+
|
61 |
+
runs.resize(128);
|
62 |
+
timer.start();
|
63 |
+
}
|
64 |
+
|
65 |
+
LogInfo::~LogInfo() {
|
66 |
+
out.flush();
|
67 |
+
}
|
68 |
+
|
69 |
+
void LogInfo::init() {
|
70 |
+
if (log_file.empty()) {
|
71 |
+
out.open("/dev/stdout");
|
72 |
+
} else {
|
73 |
+
cout << "Logging to " << log_file << endl;
|
74 |
+
out.open(log_file.c_str());
|
75 |
+
}
|
76 |
+
}
|
77 |
+
|
78 |
+
LogInfo log_info;
|
79 |
+
|
80 |
+
////////////////////////////////////////////////////////////
|
81 |
+
// LogTracker:: For tracking functions or blocks.
|
82 |
+
|
83 |
+
void LogTracker::begin(bool print_all_lines) {
|
84 |
+
if(_ind_within) {
|
85 |
+
if(log_info.this_run().print()) {
|
86 |
+
const string &s = descrip.str();
|
87 |
+
|
88 |
+
_logs(name);
|
89 |
+
if(s.size() > 0 && name[0])
|
90 |
+
lout << ": ";
|
91 |
+
lout << s;
|
92 |
+
|
93 |
+
lout.flush();
|
94 |
+
log_info.buf = " {\n"; // Open the block.
|
95 |
+
|
96 |
+
log_info.child_run().init();
|
97 |
+
log_info.child_run().print_all_lines = print_all_lines;
|
98 |
+
}
|
99 |
+
else {
|
100 |
+
log_info.max_ind_level = -log_info.max_ind_level; // Prevent children from outputting.
|
101 |
+
output_stopped = true;
|
102 |
+
}
|
103 |
+
}
|
104 |
+
|
105 |
+
log_info.ind_level++;
|
106 |
+
}
|
107 |
+
|
108 |
+
LogTracker::~LogTracker() {
|
109 |
+
log_info.ind_level--;
|
110 |
+
|
111 |
+
if(output_stopped)
|
112 |
+
log_info.max_ind_level = -log_info.max_ind_level; // Restore indent level.
|
113 |
+
|
114 |
+
if(_ind_within) {
|
115 |
+
if(log_info.this_run().new_line()) {
|
116 |
+
// Finish up child level.
|
117 |
+
log_info.ind_level++;
|
118 |
+
int n = log_info.this_run().num_omitted();
|
119 |
+
if(n > 0)
|
120 |
+
_logs("... " << n << " lines omitted ...\n");
|
121 |
+
log_info.ind_level--;
|
122 |
+
log_info.child_run().finish();
|
123 |
+
|
124 |
+
if(log_info.buf[0]) // Nothing was printed, because buf hasn't been emptied.
|
125 |
+
log_info.buf = ""; // Just pretend we didn't open the block.
|
126 |
+
else // Something indented was printed.
|
127 |
+
_logs("}"); // Close the block.
|
128 |
+
|
129 |
+
// Print time
|
130 |
+
Timer &ct = log_info.child_run().timer;
|
131 |
+
lout << " [" << ct;
|
132 |
+
if(log_info.ind_level > 0) {
|
133 |
+
Timer &tt = log_info.this_run().timer;
|
134 |
+
tt.stop();
|
135 |
+
lout << ", cumulative " << tt;
|
136 |
+
}
|
137 |
+
lout << "]\n";
|
138 |
+
}
|
139 |
+
}
|
140 |
+
}
|
141 |
+
|
142 |
+
// Options for logging.
|
143 |
+
int _log_info_max_ind_level = opt_define_int_wrap("max-ind-level", &log_info.max_ind_level, log_info.max_ind_level, "Maximum indent level for logging", false);
|
144 |
+
int _log_info_ms_per_line = opt_define_int_wrap("ms-per-line", &log_info.ms_per_line, log_info.ms_per_line, "Print a line out every this many milliseconds", false);
|
145 |
+
string _log_info_log_file = opt_define_string_wrap("log", &log_info.log_file, log_info.log_file, "File to write log to (\"\" for stdout)", false);
|
basic/logging.h
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __LOGGING_H__
|
2 |
+
#define __LOGGING_H__
|
3 |
+
|
4 |
+
#include "std.h"
|
5 |
+
#include "mem.h"
|
6 |
+
#include "timer.h"
|
7 |
+
#include "indent.h"
|
8 |
+
|
9 |
+
////////////////////////////////////////////////////////////
|
10 |
+
|
11 |
+
// State associated with a run.
|
12 |
+
struct Run {
|
13 |
+
Run() { init(); }
|
14 |
+
bool print() const { return num_lines == next_line_to_print; }
|
15 |
+
|
16 |
+
int num_omitted() { return num_lines - num_lines_printed; }
|
17 |
+
bool new_line();
|
18 |
+
|
19 |
+
void init();
|
20 |
+
void finish();
|
21 |
+
|
22 |
+
int num_lines; // Number of lines that we've gone through so far in this run.
|
23 |
+
int num_lines_printed; // Number of lines actually printed.
|
24 |
+
int next_line_to_print; // Next line to be printed (lines are 0-based).
|
25 |
+
Timer timer; // Keeps track of time spent on this run.
|
26 |
+
bool print_all_lines; // Whether or not to force the printing of each line.
|
27 |
+
};
|
28 |
+
|
29 |
+
////////////////////////////////////////////////////////////
|
30 |
+
// Global information about logging.
|
31 |
+
|
32 |
+
struct LogInfo {
|
33 |
+
LogInfo();
|
34 |
+
~LogInfo();
|
35 |
+
|
36 |
+
void init();
|
37 |
+
void hard_flush() { out.flush(); }
|
38 |
+
|
39 |
+
Run &parent_run() { return runs[ind_level-1]; }
|
40 |
+
Run &this_run() { return runs[ind_level]; }
|
41 |
+
Run &child_run() { return runs[ind_level+1]; }
|
42 |
+
|
43 |
+
// Parameters.
|
44 |
+
int max_ind_level; // Maximum indent level.
|
45 |
+
int ms_per_line; // Number of milliseconds between consecutive lines of output.
|
46 |
+
string log_file;
|
47 |
+
|
48 |
+
// State.
|
49 |
+
ofstream out;
|
50 |
+
int ind_level; // Current indent level.
|
51 |
+
const char *buf; // The buffer to be flushed out the next time _logs is called.
|
52 |
+
vector<Run> runs; // Indent level -> state
|
53 |
+
Timer timer; // Timer that starts at the beginning of the program
|
54 |
+
};
|
55 |
+
|
56 |
+
extern LogInfo log_info;
|
57 |
+
|
58 |
+
////////////////////////////////////////////////////////////
|
59 |
+
|
60 |
+
#define lout (log_info.out)
|
61 |
+
#define here lout << "HERE " << __FILE__ << ':' << __LINE__ << endl
|
62 |
+
#define _ind_within (log_info.ind_level <= log_info.max_ind_level)
|
63 |
+
#define _parent_ind_within (log_info.ind_level-1 <= log_info.max_ind_level)
|
64 |
+
#define _logs(x) \
|
65 |
+
do { lout << log_info.buf << Indent(log_info.ind_level) << x; log_info.buf = ""; } while(0)
|
66 |
+
#define logs(x) \
|
67 |
+
do { \
|
68 |
+
if(_ind_within && log_info.this_run().new_line()) { \
|
69 |
+
_logs(x << endl); \
|
70 |
+
} \
|
71 |
+
} while(0)
|
72 |
+
// Output something if parent outputted something.
|
73 |
+
// Subtle note: parent must have been a track, not logs, so its run
|
74 |
+
// information has not been updated yet until it closes.
|
75 |
+
// Therefore, calling print() on it is valid.
|
76 |
+
#define logss(x) \
|
77 |
+
do { \
|
78 |
+
if(_parent_ind_within && log_info.parent_run().print()) { \
|
79 |
+
log_info.this_run().new_line(); \
|
80 |
+
_logs(x << endl); \
|
81 |
+
} \
|
82 |
+
} while(0)
|
83 |
+
|
84 |
+
#define LOGS(x) _logs(x << endl)
|
85 |
+
|
86 |
+
////////////////////////////////////////////////////////////
|
87 |
+
// For tracking functions or blocks.
|
88 |
+
struct LogTracker {
|
89 |
+
LogTracker(const char *name) : b(true), output_stopped(false), name(name) { }
|
90 |
+
void begin(bool print_all_lines);
|
91 |
+
~LogTracker();
|
92 |
+
|
93 |
+
bool b; // Trick used in track_block to execute the for loop exactly once.
|
94 |
+
bool output_stopped;
|
95 |
+
const char *name;
|
96 |
+
ostringstream descrip;
|
97 |
+
};
|
98 |
+
|
99 |
+
#define track(name, x, all) \
|
100 |
+
LogTracker _lt(name); \
|
101 |
+
(_ind_within && log_info.this_run().print() && _lt.descrip << x), _lt.begin(all)
|
102 |
+
#define track_block(name, x, all) \
|
103 |
+
for(LogTracker _lt(name); \
|
104 |
+
_lt.b && ((_ind_within && log_info.this_run().print() && _lt.descrip << x), _lt.begin(all), true); \
|
105 |
+
_lt.b = false)
|
106 |
+
|
107 |
+
#define track_foridx(i, n, s, all) \
|
108 |
+
foridx(i, n) track_block(s, i << '/' << n, all)
|
109 |
+
#define track_forvec(i, tx, x, vec, s, all) \
|
110 |
+
forvec(i, tx, x, vec) track_block(s, i << '/' << len(vec), all)
|
111 |
+
|
112 |
+
#define init_log \
|
113 |
+
log_info.init(); \
|
114 |
+
track("main", to_vector(argv, argc), true); \
|
115 |
+
logs(now() << " on " << hostname() << " (" << cpu_speed_mhz() << "MHz)");
|
116 |
+
|
117 |
+
#define prog_status \
|
118 |
+
"PROG_STATUS: " << \
|
119 |
+
"time = " << log_info.timer.stop() << \
|
120 |
+
", memory = " << Mem(mem_usage()*1024)
|
121 |
+
|
122 |
+
#endif
|
basic/mem-tracker.cc
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "mem-tracker.h"
|
2 |
+
#include "mem.h"
|
3 |
+
|
4 |
+
/*
|
5 |
+
* Currently, memory tracking is not accurate.
|
6 |
+
* Alway underestimates.
|
7 |
+
*/
|
8 |
+
|
9 |
+
////////////////////////////////////////////////////////////
|
10 |
+
|
11 |
+
long MemTracker::compute_mem_usage(const MemRecord &r) {
|
12 |
+
switch(r.type) {
|
13 |
+
list_types(define_case);
|
14 |
+
default: assert(0);
|
15 |
+
}
|
16 |
+
return 0;
|
17 |
+
}
|
18 |
+
|
19 |
+
long MemTracker::compute_mem_usage() {
|
20 |
+
long total_mem = 0;
|
21 |
+
forvec(_, MemRecord &, r, records) {
|
22 |
+
if(r.type != T_RAWNUMBER) r.mem = compute_mem_usage(r);
|
23 |
+
total_mem += r.mem;
|
24 |
+
}
|
25 |
+
return total_mem;
|
26 |
+
}
|
27 |
+
|
28 |
+
static bool record_less_than(const MemRecord &r1, const MemRecord &r2) {
|
29 |
+
return r1.mem > r2.mem;
|
30 |
+
}
|
31 |
+
|
32 |
+
void MemTracker::report_mem_usage() {
|
33 |
+
track("report_mem_usage()", "", true);
|
34 |
+
|
35 |
+
long total_mem = compute_mem_usage();
|
36 |
+
|
37 |
+
sort(records.begin(), records.end(), record_less_than);
|
38 |
+
|
39 |
+
forvec(_, const MemRecord &, r, records) {
|
40 |
+
logs(type_names[r.type] << ' ' << r.name << ": " <<
|
41 |
+
Mem(r.mem) << " (" << (double)r.mem/total_mem << ')');
|
42 |
+
}
|
43 |
+
logs("Total: " << Mem(total_mem));
|
44 |
+
}
|
45 |
+
|
46 |
+
////////////////////////////////////////////////////////////
|
47 |
+
|
48 |
+
MemTracker mem_tracker;
|
49 |
+
|
50 |
+
const char *MemTracker::type_names[] = {
|
51 |
+
"?",
|
52 |
+
list_types(define_str)
|
53 |
+
};
|
basic/mem-tracker.h
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __MEM_TRACKER_H__
|
2 |
+
#define __MEM_TRACKER_H__
|
3 |
+
|
4 |
+
#include "std.h"
|
5 |
+
#include "stl-basic.h"
|
6 |
+
#include "union-set.h"
|
7 |
+
#include "strdb.h"
|
8 |
+
|
9 |
+
// Currently, memory tracking is not accurate.
|
10 |
+
// Alway underestimates.
|
11 |
+
|
12 |
+
// Call this function. Don't use anything else.
|
13 |
+
#define track_mem(x) mem_tracker.add(__STRING(x), x)
|
14 |
+
|
15 |
+
#define list_types(f) \
|
16 |
+
f(IntVec) \
|
17 |
+
f(IntMat) \
|
18 |
+
f(IntIntMap) \
|
19 |
+
f(IntDoubleMap) \
|
20 |
+
f(IntIntPairMap) \
|
21 |
+
f(IntPairDoubleMap) \
|
22 |
+
f(IntSet) \
|
23 |
+
f(DoubleVec) \
|
24 |
+
f(DoubleVecVec) \
|
25 |
+
f(StrVec) \
|
26 |
+
f(StrIntMap) \
|
27 |
+
f(UnionSet) \
|
28 |
+
f(StrDB)
|
29 |
+
|
30 |
+
#define prefix_t(type) T_##type,
|
31 |
+
#define define_str(type) __STRING(type),
|
32 |
+
#define define_add(type) \
|
33 |
+
void add(const char *name, const type &data) { \
|
34 |
+
records.push_back(MemRecord(name, T_##type, &data)); \
|
35 |
+
}
|
36 |
+
#define define_case(type) \
|
37 |
+
case T_##type: return mem_usage(*((const type *)r.data));
|
38 |
+
|
39 |
+
enum MemType { T_RAWNUMBER, list_types(prefix_t) };
|
40 |
+
|
41 |
+
struct MemRecord {
|
42 |
+
MemRecord(const char *name, long mem) :
|
43 |
+
name(name), type(T_RAWNUMBER), data(NULL), mem(mem) { }
|
44 |
+
MemRecord(const char *name, MemType type, const void *data) :
|
45 |
+
name(name), type(type), data(data), mem(0) { }
|
46 |
+
string name;
|
47 |
+
MemType type;
|
48 |
+
const void *data;
|
49 |
+
long mem;
|
50 |
+
};
|
51 |
+
|
52 |
+
// Track amount of memory used.
|
53 |
+
class MemTracker {
|
54 |
+
public:
|
55 |
+
static const char *type_names[];
|
56 |
+
|
57 |
+
list_types(define_add)
|
58 |
+
|
59 |
+
void add(const char *name, long mem) {
|
60 |
+
records.push_back(MemRecord(name, mem));
|
61 |
+
}
|
62 |
+
|
63 |
+
long compute_mem_usage(const MemRecord &r);
|
64 |
+
long compute_mem_usage();
|
65 |
+
void report_mem_usage();
|
66 |
+
|
67 |
+
private:
|
68 |
+
vector<MemRecord> records;
|
69 |
+
};
|
70 |
+
|
71 |
+
extern MemTracker mem_tracker;
|
72 |
+
|
73 |
+
////////////////////////////////////////////////////////////
|
74 |
+
// Various mem_usage() functions on various data types.
|
75 |
+
|
76 |
+
template<class T> long mem_usage(const vector< vector< vector< vector<T> > > > &mat) { // matrix
|
77 |
+
long mem = 0;
|
78 |
+
foridx(i, len(mat)) {
|
79 |
+
foridx(j, len(mat[i])) {
|
80 |
+
foridx(k, len(mat[i][j]))
|
81 |
+
mem += len(mat[i][j][k]) * sizeof(T);
|
82 |
+
mem += len(mat[i][j]) * sizeof(vector<T>);
|
83 |
+
}
|
84 |
+
mem += len(mat[i]) * sizeof(vector<T>);
|
85 |
+
}
|
86 |
+
mem += len(mat) * sizeof(vector<T>);
|
87 |
+
return mem;
|
88 |
+
}
|
89 |
+
|
90 |
+
template<class T> long mem_usage(const vector< vector< vector<T> > > &mat) { // matrix
|
91 |
+
long mem = 0;
|
92 |
+
foridx(i, len(mat)) {
|
93 |
+
foridx(j, len(mat[i]))
|
94 |
+
mem += len(mat[i][j]) * sizeof(T);
|
95 |
+
mem += len(mat[i]) * sizeof(vector<T>);
|
96 |
+
}
|
97 |
+
mem += len(mat) * sizeof(vector<T>);
|
98 |
+
return mem;
|
99 |
+
}
|
100 |
+
|
101 |
+
template<class T> long mem_usage(const vector< vector<T> > &mat) { // matrix
|
102 |
+
long mem = 0;
|
103 |
+
foridx(i, len(mat))
|
104 |
+
mem += len(mat[i]) * sizeof(T);
|
105 |
+
mem += len(mat) * sizeof(vector<T>);
|
106 |
+
return mem;
|
107 |
+
}
|
108 |
+
|
109 |
+
template<class T> long mem_usage(const vector<T> &vec) { // vector
|
110 |
+
return len(vec) * sizeof(T);
|
111 |
+
}
|
112 |
+
|
113 |
+
template<class T> long mem_usage(const unordered_set<T> &set) { // hash_set
|
114 |
+
return (long)set.bucket_count()*4 + len(set)*(sizeof(T)+sizeof(void *));
|
115 |
+
}
|
116 |
+
|
117 |
+
template<class Tx, class Ty, class Hf, class Eq> long mem_usage(const unordered_map<Tx, Ty, Hf, Eq> &map) { // hash_map
|
118 |
+
return (long)map.bucket_count()*4 + len(map)*(sizeof(Tx)+sizeof(Ty)+sizeof(void *));
|
119 |
+
}
|
120 |
+
|
121 |
+
inline long mem_usage(const UnionSet &u) { // UnionSet
|
122 |
+
return mem_usage(u.parent);
|
123 |
+
}
|
124 |
+
|
125 |
+
inline long mem_usage(const StrDB &db) { // StrDB
|
126 |
+
long mem = mem_usage(db.s2i) + mem_usage(db.i2s);
|
127 |
+
foridx(i, len(db))
|
128 |
+
mem += (strlen(db[i])+1) * sizeof(char);
|
129 |
+
return mem;
|
130 |
+
}
|
131 |
+
|
132 |
+
#endif
|
basic/mem.h
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __MEM_H__
|
2 |
+
#define __MEM_H__
|
3 |
+
|
4 |
+
// Takes memory is in bytes and formats it nicely
|
5 |
+
struct Mem { Mem(long mem) : mem(mem) { } long mem; };
|
6 |
+
inline ostream &operator<<(ostream &out, const Mem &m) {
|
7 |
+
unsigned long mem = m.mem;
|
8 |
+
if(mem < 1024) out << mem;
|
9 |
+
else if(mem < 1024*1024) out << mem/1024 << 'K';
|
10 |
+
else out << mem/(1024*1024) << 'M';
|
11 |
+
return out;
|
12 |
+
}
|
13 |
+
|
14 |
+
#endif
|
basic/multi-ostream.cc
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "multi-ostream.h"
|
2 |
+
|
3 |
+
/*
|
4 |
+
* Create a multi_ostream, and you can add many files or any ostream objects
|
5 |
+
* to it. The output sent to the multi_ostream will be redirected to the many
|
6 |
+
* destinations.
|
7 |
+
* Useful for logging to a file and stdout.
|
8 |
+
*/
|
9 |
+
|
10 |
+
#include <iostream>
|
11 |
+
#include <fstream>
|
12 |
+
#include <vector>
|
13 |
+
|
14 |
+
using namespace std;
|
15 |
+
|
16 |
+
multi_buf::~multi_buf() {
|
17 |
+
flush();
|
18 |
+
for(size_t i = 0; i < infos.size(); i++)
|
19 |
+
infos[i].destroy();
|
20 |
+
}
|
21 |
+
|
22 |
+
void multi_buf::add(ostream *out, bool own, bool hard) {
|
23 |
+
infos.push_back(ostream_info(out, own, hard));
|
24 |
+
}
|
25 |
+
|
26 |
+
void multi_buf::flush() {
|
27 |
+
for(size_t i = 0; i < infos.size(); i++) {
|
28 |
+
ostream_info &info = infos[i];
|
29 |
+
info.out->write(buf, buf_i);
|
30 |
+
info.out->flush();
|
31 |
+
}
|
32 |
+
buf_i = 0;
|
33 |
+
}
|
34 |
+
|
35 |
+
void multi_buf::hard_flush() {
|
36 |
+
for(size_t i = 0; i < infos.size(); i++) {
|
37 |
+
ostream_info &info = infos[i];
|
38 |
+
info.out->write(buf, buf_i);
|
39 |
+
if(info.hard)
|
40 |
+
((hard_ofstream *)info.out)->hard_flush();
|
41 |
+
else
|
42 |
+
info.out->flush();
|
43 |
+
}
|
44 |
+
buf_i = 0;
|
45 |
+
}
|
46 |
+
|
47 |
+
int multi_buf::overflow(int ch) {
|
48 |
+
buf[buf_i++] = ch;
|
49 |
+
if(buf_i == sizeof(buf) || ch == '\n') flush();
|
50 |
+
return ch;
|
51 |
+
}
|
52 |
+
|
53 |
+
ostream &multi_ostream::flush() {
|
54 |
+
sbuf.flush();
|
55 |
+
return *this;
|
56 |
+
}
|
57 |
+
|
58 |
+
ostream &multi_ostream::hard_flush() {
|
59 |
+
sbuf.hard_flush();
|
60 |
+
return *this;
|
61 |
+
}
|
basic/multi-ostream.h
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __MULTI_OSTREAM_H__
|
2 |
+
#define __MULTI_OSTREAM_H__
|
3 |
+
|
4 |
+
/*
|
5 |
+
* Create a multi_ostream, and you can add many files or any ostream objects
|
6 |
+
* to it. The output sent to the multi_ostream will be redirected to the many
|
7 |
+
* destinations.
|
8 |
+
* Useful for logging to a file and stdout.
|
9 |
+
*/
|
10 |
+
|
11 |
+
#include <iostream>
|
12 |
+
#include <fstream>
|
13 |
+
#include <vector>
|
14 |
+
|
15 |
+
#include "hard-ofstream.h"
|
16 |
+
|
17 |
+
using namespace std;
|
18 |
+
|
19 |
+
struct ostream_info {
|
20 |
+
ostream_info(ostream *out, bool own, bool hard) : out(out), own(own), hard(hard) { }
|
21 |
+
ostream *out;
|
22 |
+
bool own; // Whether we own the ostream and should destroy it at the end.
|
23 |
+
bool hard; // Whether this is a hard_ofstream.
|
24 |
+
|
25 |
+
void destroy() { if(own) delete out; }
|
26 |
+
};
|
27 |
+
|
28 |
+
class multi_buf : public streambuf {
|
29 |
+
public:
|
30 |
+
multi_buf() : buf_i(0) { }
|
31 |
+
~multi_buf();
|
32 |
+
|
33 |
+
void flush();
|
34 |
+
void hard_flush();
|
35 |
+
|
36 |
+
void add(ostream *out, bool own, bool hard);
|
37 |
+
void remove_last() { flush(); infos.back().destroy(); infos.pop_back(); }
|
38 |
+
|
39 |
+
protected:
|
40 |
+
virtual int overflow(int ch);
|
41 |
+
|
42 |
+
private:
|
43 |
+
vector<ostream_info> infos;
|
44 |
+
char buf[16384];
|
45 |
+
int buf_i;
|
46 |
+
};
|
47 |
+
|
48 |
+
class multi_ostream : public basic_ostream<char, char_traits<char> > {
|
49 |
+
public:
|
50 |
+
multi_ostream() : basic_ostream<char, char_traits<char> >(&sbuf) { }
|
51 |
+
|
52 |
+
virtual ostream &flush();
|
53 |
+
virtual ostream &hard_flush();
|
54 |
+
|
55 |
+
void add(const char *file, bool hard = false) {
|
56 |
+
ostream *out = hard ? new hard_ofstream(file) : new ofstream(file);
|
57 |
+
sbuf.add(out, true, hard);
|
58 |
+
}
|
59 |
+
void add(ostream *out) { sbuf.add(out, false, false); }
|
60 |
+
|
61 |
+
void remove_last() { sbuf.remove_last(); }
|
62 |
+
|
63 |
+
private:
|
64 |
+
multi_buf sbuf;
|
65 |
+
};
|
66 |
+
|
67 |
+
#endif
|
basic/opt.cc
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "opt.h"
|
2 |
+
#include "std.h"
|
3 |
+
#include "logging.h"
|
4 |
+
#include <getopt.h>
|
5 |
+
|
6 |
+
////////////////////////////////////////////////////////////////////////
|
7 |
+
// command-line arguments
|
8 |
+
|
9 |
+
void GetOpt::AddOpt(const string &name, bool has_arg) {
|
10 |
+
opts.push_back(pair<string, bool>(name, has_arg));
|
11 |
+
}
|
12 |
+
|
13 |
+
void GetOpt::Parse(int argc, char *argv[]) {
|
14 |
+
option *opt_list = new option[opts.size()+1];
|
15 |
+
for(int i = 0; i <= (int)opts.size(); i++) {
|
16 |
+
option *o = &opt_list[i];
|
17 |
+
if(i < (int)opts.size()) {
|
18 |
+
o->name = opts[i].first.c_str();
|
19 |
+
o->has_arg = opts[i].second;
|
20 |
+
//printf("N %s\n", o->name);
|
21 |
+
}
|
22 |
+
else {
|
23 |
+
o->name = NULL;
|
24 |
+
o->has_arg = 0;
|
25 |
+
}
|
26 |
+
o->flag = NULL;
|
27 |
+
o->val = 0;
|
28 |
+
}
|
29 |
+
|
30 |
+
int i;
|
31 |
+
|
32 |
+
values.clear();
|
33 |
+
values.resize(opts.size());
|
34 |
+
while(true) {
|
35 |
+
int status = getopt_long(argc, argv, "", opt_list, &i);
|
36 |
+
if(status == -1) break;
|
37 |
+
assert(status == 0);
|
38 |
+
//debug("%d %s -> %s\n", i, opt_list[i].name, optarg);
|
39 |
+
// put a 1 to signify that the argument exists
|
40 |
+
values[i] = optarg ? optarg : "1";
|
41 |
+
}
|
42 |
+
|
43 |
+
delete [] opt_list;
|
44 |
+
}
|
45 |
+
|
46 |
+
int GetOpt::Lookup(const string &name) const {
|
47 |
+
for(int i = 0; i < (int)opts.size(); i++) {
|
48 |
+
if(opts[i].first == name) return i;
|
49 |
+
}
|
50 |
+
return -1;
|
51 |
+
}
|
52 |
+
|
53 |
+
string GetOpt::Get(const string &name, const string &default_value) const {
|
54 |
+
int i = Lookup(name);
|
55 |
+
return i != -1 && !values[i].empty() ? values[i] : default_value;
|
56 |
+
}
|
57 |
+
|
58 |
+
string GetOpt::Get(const string &name) const {
|
59 |
+
string x = Get(name, "");
|
60 |
+
if(x.empty()) {
|
61 |
+
fprintf(stderr, "Missing required parameter `%s'.\n", name.c_str());
|
62 |
+
exit(1);
|
63 |
+
}
|
64 |
+
return x;
|
65 |
+
}
|
66 |
+
|
67 |
+
bool GetOpt::Exists(const string &name) const {
|
68 |
+
return !Get(name, "").empty();
|
69 |
+
}
|
70 |
+
|
71 |
+
int GetOpt::GetInt(const string &name) const {
|
72 |
+
int x;
|
73 |
+
int r = sscanf(Get(name).c_str(), "%d", &x);
|
74 |
+
assert(r == 1);
|
75 |
+
return x;
|
76 |
+
}
|
77 |
+
|
78 |
+
int GetOpt::GetInt(const string &name, int default_value) const {
|
79 |
+
return Exists(name) ? GetInt(name) : default_value;
|
80 |
+
}
|
81 |
+
|
82 |
+
double GetOpt::GetDouble(const string &name) const {
|
83 |
+
double x;
|
84 |
+
int r = sscanf(Get(name).c_str(), "%lf", &x);
|
85 |
+
assert(r == 1);
|
86 |
+
return x;
|
87 |
+
}
|
88 |
+
|
89 |
+
double GetOpt::GetDouble(const string &name, double default_value) const {
|
90 |
+
return Exists(name) ? GetDouble(name) : default_value;
|
91 |
+
}
|
92 |
+
|
93 |
+
////////////////////////////////////////////////////////////
|
94 |
+
|
95 |
+
void process_opt(int argc, char *argv[]) {
|
96 |
+
GetOpt opt;
|
97 |
+
|
98 |
+
// set up GetOpt to parse
|
99 |
+
for(int i = 0; i < (int)bool_opts.size(); i++) {
|
100 |
+
opt.AddOpt(bool_opts[i].name, false);
|
101 |
+
opt.AddOpt("no" + bool_opts[i].name, false);
|
102 |
+
}
|
103 |
+
for(int i = 0; i < (int)int_opts.size(); i++)
|
104 |
+
opt.AddOpt(int_opts[i].name, true);
|
105 |
+
for(int i = 0; i < (int)double_opts.size(); i++)
|
106 |
+
opt.AddOpt(double_opts[i].name, true);
|
107 |
+
for(int i = 0; i < (int)string_opts.size(); i++)
|
108 |
+
opt.AddOpt(string_opts[i].name, true);
|
109 |
+
opt.AddOpt("help", false);
|
110 |
+
|
111 |
+
// parse
|
112 |
+
opt.Parse(argc, argv);
|
113 |
+
|
114 |
+
// print help if called for
|
115 |
+
if(opt.Exists("help") || !opt.Exists("text")) {
|
116 |
+
printf("usage: %s\n", argv[0]);
|
117 |
+
for(int i = 0; i < (int)bool_opts.size(); i++) {
|
118 |
+
const OptInfo<bool> &o = bool_opts[i];
|
119 |
+
printf(" %c%-20s: %s", " *"[o.required], o.name.c_str(), o.msg.c_str());
|
120 |
+
if(!o.required) printf(" [%s]", *(o.var) ? "true" : "false");
|
121 |
+
printf("\n");
|
122 |
+
}
|
123 |
+
for(int i = 0; i < (int)int_opts.size(); i++) {
|
124 |
+
const OptInfo<int> &o = int_opts[i];
|
125 |
+
printf(" %c%-13s <int> : %s", " *"[o.required], o.name.c_str(), o.msg.c_str());
|
126 |
+
if(!o.required) printf(" [%d]", *(o.var));
|
127 |
+
printf("\n");
|
128 |
+
}
|
129 |
+
for(int i = 0; i < (int)double_opts.size(); i++) {
|
130 |
+
const OptInfo<double> &o = double_opts[i];
|
131 |
+
printf(" %c%-13s <dbl> : %s", " *"[o.required], o.name.c_str(), o.msg.c_str());
|
132 |
+
if(!o.required) printf(" [%f]", *(o.var));
|
133 |
+
printf("\n");
|
134 |
+
}
|
135 |
+
for(int i = 0; i < (int)string_opts.size(); i++) {
|
136 |
+
const OptInfo<string> &o = string_opts[i];
|
137 |
+
printf(" %c%-13s <str> : %s", " *"[o.required], o.name.c_str(), o.msg.c_str());
|
138 |
+
if(!o.required) printf(" [%s]", (o.var)->c_str());
|
139 |
+
printf("\n");
|
140 |
+
}
|
141 |
+
exit(1);
|
142 |
+
}
|
143 |
+
|
144 |
+
// retrieve data; store the variables
|
145 |
+
for(int i = 0; i < (int)bool_opts.size(); i++) {
|
146 |
+
const OptInfo<bool> &o = bool_opts[i];
|
147 |
+
bool yes = opt.Exists(o.name);
|
148 |
+
bool no = opt.Exists("no" + o.name);
|
149 |
+
assert(!o.required || (yes || no));
|
150 |
+
assert(!yes || !no);
|
151 |
+
if(yes) *(o.var) = true;
|
152 |
+
if(no) *(o.var) = false;
|
153 |
+
}
|
154 |
+
for(int i = 0; i < (int)int_opts.size(); i++) {
|
155 |
+
const OptInfo<int> &o = int_opts[i];
|
156 |
+
*(o.var) = o.required ? opt.GetInt(o.name) : opt.GetInt(o.name, *(o.var));
|
157 |
+
}
|
158 |
+
for(int i = 0; i < (int)double_opts.size(); i++) {
|
159 |
+
const OptInfo<double> &o = double_opts[i];
|
160 |
+
*(o.var) = o.required ? opt.GetDouble(o.name) : opt.GetDouble(o.name, *(o.var));
|
161 |
+
}
|
162 |
+
for(int i = 0; i < (int)string_opts.size(); i++) {
|
163 |
+
const OptInfo<string> &o = string_opts[i];
|
164 |
+
*(o.var) = o.required ? opt.Get(o.name) : opt.Get(o.name, *(o.var));
|
165 |
+
}
|
166 |
+
}
|
167 |
+
|
168 |
+
void init_opt(int argc, char *argv[]) {
|
169 |
+
process_opt(argc, argv);
|
170 |
+
srand(rand_seed);
|
171 |
+
}
|
172 |
+
|
173 |
+
void print_opts() {
|
174 |
+
track("print_opts()", "", true);
|
175 |
+
forvec(_, const OptInfo<bool> &, o, bool_opts)
|
176 |
+
logs(o.name << " = " << (*o.var ? "true" : "false"));
|
177 |
+
forvec(_, const OptInfo<int> &, o, int_opts)
|
178 |
+
logs(o.name << " = " << *o.var);
|
179 |
+
forvec(_, const OptInfo<double> &, o, double_opts)
|
180 |
+
logs(o.name << " = " << *o.var);
|
181 |
+
forvec(_, const OptInfo<string> &, o, string_opts)
|
182 |
+
logs(o.name << " = " << *o.var);
|
183 |
+
}
|
184 |
+
|
185 |
+
////////////////////////////////////////////////////////////
|
186 |
+
// Pre defined options.
|
187 |
+
|
188 |
+
// allow user to specify a comment always, so some arbitrary description
|
189 |
+
// of this program execution can be embedded in the command-line
|
basic/opt.h
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __OPT_H__
|
2 |
+
#define __OPT_H__
|
3 |
+
|
4 |
+
#include <vector>
|
5 |
+
#include <string>
|
6 |
+
#include <stdio.h>
|
7 |
+
|
8 |
+
using namespace std;
|
9 |
+
|
10 |
+
// First thing to call in main().
|
11 |
+
void init_opt(int argc, char *argv[]);
|
12 |
+
|
13 |
+
////////////////////////////////////////////////////////////////////////
|
14 |
+
// command-line arguments
|
15 |
+
|
16 |
+
class GetOpt {
|
17 |
+
public:
|
18 |
+
GetOpt() { }
|
19 |
+
|
20 |
+
void AddOpt(const string &name, bool has_arg);
|
21 |
+
void Parse(int argc, char *argv[]);
|
22 |
+
int Lookup(const string &name) const;
|
23 |
+
|
24 |
+
bool Exists(const string &name) const;
|
25 |
+
string Get(const string &name, const string &default_value) const;
|
26 |
+
string Get(const string &name) const;
|
27 |
+
int GetInt(const string &name) const;
|
28 |
+
int GetInt(const string &name, int default_value) const;
|
29 |
+
double GetDouble(const string &name) const;
|
30 |
+
double GetDouble(const string &name, double default_value) const;
|
31 |
+
|
32 |
+
private:
|
33 |
+
vector< pair<string, bool> > opts;
|
34 |
+
vector<string> values;
|
35 |
+
};
|
36 |
+
|
37 |
+
template<class T> struct OptInfo {
|
38 |
+
OptInfo(const string &name, T *var, const string &msg, bool required)
|
39 |
+
: name(name), var(var), msg(msg), required(required) { }
|
40 |
+
|
41 |
+
string name;
|
42 |
+
T *var; // location of the variable that stores this value
|
43 |
+
string msg;
|
44 |
+
bool required;
|
45 |
+
};
|
46 |
+
|
47 |
+
extern vector< OptInfo<bool> > bool_opts;
|
48 |
+
extern vector< OptInfo<int> > int_opts;
|
49 |
+
extern vector< OptInfo<double> > double_opts;
|
50 |
+
extern vector< OptInfo<string> > string_opts;
|
51 |
+
|
52 |
+
////////////////////////////////////////////////////////////
|
53 |
+
|
54 |
+
// two versions: in one, option is required
|
55 |
+
#define opt_define_bool_req(var, name, msg) \
|
56 |
+
bool var = opt_define_bool_wrap(name, &var, false, msg, true)
|
57 |
+
#define opt_define_bool(var, name, val, msg) \
|
58 |
+
bool var = opt_define_bool_wrap(name, &var, val, msg, false)
|
59 |
+
#define opt_define_int_req(var, name, msg) \
|
60 |
+
int var = opt_define_int_wrap(name, &var, 0, msg, true)
|
61 |
+
#define opt_define_int(var, name, val, msg) \
|
62 |
+
int var = opt_define_int_wrap(name, &var, val, msg, false)
|
63 |
+
#define opt_define_double_req(var, name, msg) \
|
64 |
+
double var = opt_define_double_wrap(name, &var, 0.0, msg, true)
|
65 |
+
#define opt_define_double(var, name, val, msg) \
|
66 |
+
double var = opt_define_double_wrap(name, &var, val, msg, false)
|
67 |
+
#define opt_define_string_req(var, name, msg) \
|
68 |
+
string var = opt_define_string_wrap(name, &var, "", msg, true)
|
69 |
+
#define opt_define_string(var, name, val, msg) \
|
70 |
+
string var = opt_define_string_wrap(name, &var, val, msg, false)
|
71 |
+
|
72 |
+
inline bool opt_define_bool_wrap(const string &name, bool *var, bool val, const string &msg, bool required) {
|
73 |
+
bool_opts.push_back(OptInfo<bool>(name, var, msg, required));
|
74 |
+
return val;
|
75 |
+
}
|
76 |
+
|
77 |
+
inline int opt_define_int_wrap(const string &name, int *var, int val, const string &msg, bool required) {
|
78 |
+
//printf("HELLO %s\n", name.c_str());
|
79 |
+
int_opts.push_back(OptInfo<int>(name, var, msg, required));
|
80 |
+
//printf("N %d\n", (int)int_opts.size());
|
81 |
+
return val;
|
82 |
+
}
|
83 |
+
inline double opt_define_double_wrap(const string &name, double *var, double val, const string &msg, bool required) {
|
84 |
+
double_opts.push_back(OptInfo<double>(name, var, msg, required));
|
85 |
+
return val;
|
86 |
+
}
|
87 |
+
inline string opt_define_string_wrap(const string &name, string *var, const string &val, const string &msg, bool required) {
|
88 |
+
string_opts.push_back(OptInfo<string>(name, var, msg, required));
|
89 |
+
return val;
|
90 |
+
}
|
91 |
+
|
92 |
+
////////////////////////////////////////////////////////////
|
93 |
+
|
94 |
+
void print_opts();
|
95 |
+
|
96 |
+
extern int rand_seed;
|
97 |
+
extern string comment;
|
98 |
+
extern int initC;
|
99 |
+
|
100 |
+
#endif
|
basic/pipe.h
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
Execute another application, piping input to and from its stdin and stdout.
|
3 |
+
*/
|
4 |
+
|
5 |
+
#ifndef __PIPE_H__
|
6 |
+
#define __PIPE_H__
|
7 |
+
|
8 |
+
typedef pair<FILE *, FILE *> FILEPair;
|
9 |
+
|
10 |
+
// Return input and output file pointers.
|
11 |
+
// User is responsible for closing them.
|
12 |
+
// May have to close out before reading from in.
|
13 |
+
FILEPair create_pipe(char *const cmd[]) {
|
14 |
+
int p2c_fds[2], c2p_fds[2];
|
15 |
+
|
16 |
+
assert(pipe(p2c_fds) == 0);
|
17 |
+
assert(pipe(c2p_fds) == 0);
|
18 |
+
|
19 |
+
int pid = fork();
|
20 |
+
assert(pid != -1);
|
21 |
+
if(pid != 0) { // parent
|
22 |
+
close(p2c_fds[0]);
|
23 |
+
close(c2p_fds[1]);
|
24 |
+
|
25 |
+
FILE *in = fdopen(c2p_fds[0], "r");
|
26 |
+
FILE *out = fdopen(p2c_fds[1], "w");
|
27 |
+
|
28 |
+
assert(in && out);
|
29 |
+
|
30 |
+
return FILEPair(in, out);
|
31 |
+
}
|
32 |
+
else { // child
|
33 |
+
close(p2c_fds[1]);
|
34 |
+
close(c2p_fds[0]);
|
35 |
+
|
36 |
+
assert(dup2(p2c_fds[0], fileno(stdin)) != -1);
|
37 |
+
assert(dup2(c2p_fds[1], fileno(stdout)) != -1);
|
38 |
+
execvp(cmd[0], cmd);
|
39 |
+
|
40 |
+
// Execution should not reach here.
|
41 |
+
assert(0);
|
42 |
+
return FILEPair(NULL, NULL);
|
43 |
+
}
|
44 |
+
}
|
45 |
+
|
46 |
+
#endif
|
basic/prob-utils.cc
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "prob-utils.h"
|
2 |
+
|
3 |
+
double rand_gaussian(double mean, double var) {
|
4 |
+
// Use the Box-Muller Transformation
|
5 |
+
// if x_1 and x_2 are independent uniform [0, 1],
|
6 |
+
// then sqrt(-2 ln x_1) * cos(2*pi*x_2) is Gaussian with mean 0 and variance 1
|
7 |
+
double x1 = rand_double(), x2 = rand_double();
|
8 |
+
double z = sqrt(-2*log(x1))*cos(2*M_PI*x2);
|
9 |
+
return z * sqrt(var) + mean;
|
10 |
+
}
|
11 |
+
|
12 |
+
// The probability of heads is p.
|
13 |
+
// Throw n coin tosses.
|
14 |
+
// Return number of heads.
|
15 |
+
int rand_binomial(int n, double p) {
|
16 |
+
int k = 0;
|
17 |
+
while(n--) k += rand_double() < p;
|
18 |
+
return k;
|
19 |
+
}
|
20 |
+
|
21 |
+
inline double factorial(int n) {
|
22 |
+
double ans = 1;
|
23 |
+
while(n > 1) ans *= n--;
|
24 |
+
return ans;
|
25 |
+
}
|
26 |
+
|
27 |
+
inline double choose(int n, int k) {
|
28 |
+
if(n-k < k) k = n-k;
|
29 |
+
double ans = 1;
|
30 |
+
for(int i = 0; i < k; i++) ans *= n-i;
|
31 |
+
ans /= factorial(k);
|
32 |
+
return ans;
|
33 |
+
}
|
34 |
+
|
35 |
+
double binomial_prob(int n, int k, double p) {
|
36 |
+
return choose(n, k) * pow(p, k) * pow(1-p, n-k);
|
37 |
+
}
|
38 |
+
|
39 |
+
int rand_index(const fvector &probs) {
|
40 |
+
double v = rand_double();
|
41 |
+
double sum = 0;
|
42 |
+
foridx(i, len(probs)) {
|
43 |
+
sum += probs[i];
|
44 |
+
if(v < sum) return i;
|
45 |
+
}
|
46 |
+
assert(0);
|
47 |
+
}
|
48 |
+
|
49 |
+
void norm_distrib(fvector &vec) {
|
50 |
+
double sum = 0;
|
51 |
+
foridx(i, len(vec)) sum += vec[i];
|
52 |
+
foridx(i, len(vec)) vec[i] /= sum;
|
53 |
+
}
|
54 |
+
|
55 |
+
void norm_distrib(fmatrix &mat, int c) {
|
56 |
+
double sum = 0;
|
57 |
+
foridx(r, len(mat)) sum += mat[r][c];
|
58 |
+
foridx(r, len(mat)) mat[r][c] /= sum;
|
59 |
+
}
|
60 |
+
|
61 |
+
void rand_distrib(fvector &probs, int n) {
|
62 |
+
probs.resize(n);
|
63 |
+
foridx(i, n) probs[i] = rand();
|
64 |
+
norm_distrib(probs);
|
65 |
+
}
|
66 |
+
|
67 |
+
IntVec rand_permutation(int n) {
|
68 |
+
IntVec perm(n);
|
69 |
+
foridx(i, n) perm[i] = i;
|
70 |
+
foridx(i, n) {
|
71 |
+
int j = mrand(i, n);
|
72 |
+
int t = perm[i]; perm[i] = perm[j]; perm[j] = t;
|
73 |
+
}
|
74 |
+
return perm;
|
75 |
+
}
|
basic/prob-utils.h
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __PROB_UTILS__
|
2 |
+
#define __PROB_UTILS__
|
3 |
+
|
4 |
+
#include "stl-basic.h"
|
5 |
+
|
6 |
+
int rand_binomial(int n, double p);
|
7 |
+
int rand_index(const fvector &probs);
|
8 |
+
double rand_gaussian(double mean, double var);
|
9 |
+
|
10 |
+
inline double factorial(int n);
|
11 |
+
inline double choose(int n, int k);
|
12 |
+
double binomial_prob(int n, int k, double p);
|
13 |
+
|
14 |
+
void norm_distrib(fvector &vec);
|
15 |
+
void norm_distrib(fmatrix &mat, int c);
|
16 |
+
void rand_distrib(fvector &probs, int n);
|
17 |
+
IntVec rand_permutation(int n);
|
18 |
+
|
19 |
+
#endif
|
basic/stats.cc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
#include "stats.h"
|
basic/stats.h
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __STATS_H__
|
2 |
+
#define __STATS_H__
|
3 |
+
|
4 |
+
#include "std.h"
|
5 |
+
#include "stl-basic.h"
|
6 |
+
#define DBL_MAX 1e300
|
7 |
+
#define DBL_MIN (-1e300)
|
8 |
+
|
9 |
+
struct StatFig {
|
10 |
+
StatFig() { clear(); }
|
11 |
+
StatFig(double sum, int n) : sum(sum), n(n) { }
|
12 |
+
virtual ~StatFig() { }
|
13 |
+
|
14 |
+
static double F1(const StatFig &fig1, const StatFig &fig2) {
|
15 |
+
if(fig1.n == 0 || fig2.n == 0) return 0;
|
16 |
+
return 2*fig1.val()*fig2.val() / (fig1.val()+fig2.val());
|
17 |
+
}
|
18 |
+
|
19 |
+
void add() { add(1); }
|
20 |
+
virtual void add(double v) { sum += v; n++; }
|
21 |
+
virtual void clear() { sum = n = 0; }
|
22 |
+
int size() const { return n; }
|
23 |
+
double val() const { return sum / n; }
|
24 |
+
double mean() const { return sum / n; }
|
25 |
+
double sum;
|
26 |
+
int n;
|
27 |
+
};
|
28 |
+
|
29 |
+
inline ostream &operator<<(ostream &out, const StatFig &fig) {
|
30 |
+
return out << fig.sum << '/' << fig.n << '=' << fig.val();
|
31 |
+
}
|
32 |
+
|
33 |
+
////////////////////////////////////////////////////////////
|
34 |
+
// Stores the min and the amx
|
35 |
+
|
36 |
+
struct BigStatFig : public StatFig {
|
37 |
+
BigStatFig() { clear(); }
|
38 |
+
void add(double v) { if(v < min) min = v; if(v > max) max = v; StatFig::add(v); }
|
39 |
+
void clear() { min = DBL_MAX; max = DBL_MIN; StatFig::clear(); }
|
40 |
+
double min, max;
|
41 |
+
};
|
42 |
+
|
43 |
+
inline ostream &operator<<(ostream &out, const BigStatFig &fig) {
|
44 |
+
return out << fig.n << ':' << fig.min << "/<< " << fig.val() << " >>/" << fig.max;
|
45 |
+
}
|
46 |
+
|
47 |
+
////////////////////////////////////////////////////////////
|
48 |
+
// Stores the standard deviation (and all points)
|
49 |
+
|
50 |
+
struct FullStatFig : public BigStatFig {
|
51 |
+
FullStatFig() { clear(); }
|
52 |
+
virtual ~FullStatFig() { }
|
53 |
+
void add(double v) { data.push_back(v); BigStatFig::add(v); }
|
54 |
+
void clear() { data.clear(); BigStatFig::clear(); }
|
55 |
+
|
56 |
+
double variance() const {
|
57 |
+
double var = 0, mean = val();
|
58 |
+
forvec(_, double, v, data) var += sq(v-mean);
|
59 |
+
var /= n;
|
60 |
+
return var;
|
61 |
+
}
|
62 |
+
double stddev() const { return sqrt(variance()); }
|
63 |
+
|
64 |
+
DoubleVec data;
|
65 |
+
};
|
66 |
+
|
67 |
+
inline ostream &operator<<(ostream &out, const FullStatFig &fig) {
|
68 |
+
return out << (BigStatFig)fig << '~' << fig.stddev();
|
69 |
+
}
|
70 |
+
|
71 |
+
#endif
|
basic/std.cc
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <sys/stat.h>
|
2 |
+
#include <dirent.h>
|
3 |
+
#include <unistd.h>
|
4 |
+
#include "std.h"
|
5 |
+
#include "str.h"
|
6 |
+
#include "timer.h"
|
7 |
+
|
8 |
+
// Return the current date/time.
|
9 |
+
string now() {
|
10 |
+
time_t t = time(NULL);
|
11 |
+
return substr(ctime(&t), 0, -1);
|
12 |
+
}
|
13 |
+
|
14 |
+
string hostname() {
|
15 |
+
char buf[1024];
|
16 |
+
gethostname(buf, sizeof(buf));
|
17 |
+
return buf;
|
18 |
+
}
|
19 |
+
|
20 |
+
// Return the amount of memory (kB) used by this process
|
21 |
+
long mem_usage() {
|
22 |
+
ifstream in("/proc/self/status");
|
23 |
+
if(!in) return 0;
|
24 |
+
char buf[1024];
|
25 |
+
static const char *key = "VmRSS";
|
26 |
+
|
27 |
+
while(in.getline(buf, sizeof(buf))) {
|
28 |
+
if(strncmp(buf, key, strlen(key)) != 0) continue;
|
29 |
+
char *s = strchr(buf, ':');
|
30 |
+
if(!s) return 0;
|
31 |
+
long x;
|
32 |
+
sscanf(s+1, "%ld", &x);
|
33 |
+
return x;
|
34 |
+
}
|
35 |
+
return -1;
|
36 |
+
}
|
37 |
+
|
38 |
+
// Return whether the file exists.
|
39 |
+
bool file_exists(const char *file) {
|
40 |
+
return access(file, F_OK) == 0;
|
41 |
+
}
|
42 |
+
|
43 |
+
// Create an empty file. Return success.
|
44 |
+
bool create_file(const char *file) {
|
45 |
+
ofstream out(file);
|
46 |
+
if(!out) return false;
|
47 |
+
out.close();
|
48 |
+
return true;
|
49 |
+
}
|
50 |
+
|
51 |
+
time_t file_modified_time(const char *file) {
|
52 |
+
struct stat stat_buf;
|
53 |
+
if(stat(file, &stat_buf) != 0)
|
54 |
+
return 0;
|
55 |
+
return stat_buf.st_mtime;
|
56 |
+
}
|
57 |
+
|
58 |
+
// Return the cpu speed in MHz.
|
59 |
+
int cpu_speed_mhz() {
|
60 |
+
ifstream in("/proc/cpuinfo");
|
61 |
+
if(!in) return 0;
|
62 |
+
char buf[1024];
|
63 |
+
static const char *key = "cpu MHz";
|
64 |
+
|
65 |
+
while(in.getline(buf, sizeof(buf))) {
|
66 |
+
if(strncmp(buf, key, strlen(key)) != 0) continue;
|
67 |
+
char *s = strchr(buf, ':');
|
68 |
+
if(!s) return 0;
|
69 |
+
double x;
|
70 |
+
sscanf(s+1, "%lf", &x);
|
71 |
+
return (int)x;
|
72 |
+
}
|
73 |
+
return 0;
|
74 |
+
}
|
75 |
+
|
76 |
+
// "file" -> "file"
|
77 |
+
// "dir/file" -> "file"
|
78 |
+
string strip_dir(string s) {
|
79 |
+
return substr(s, s.rfind('/')+1);
|
80 |
+
}
|
81 |
+
|
82 |
+
// "file" -> "file"
|
83 |
+
// "dir/file" -> "dir"
|
84 |
+
string get_dir(string s) {
|
85 |
+
int i = s.rfind('/');
|
86 |
+
return i == -1 ? "." : substr(s, 0, s.rfind('/'));
|
87 |
+
}
|
88 |
+
|
89 |
+
// "base" -> "base"
|
90 |
+
// "base.ext" -> "base"
|
91 |
+
string file_base(string s) {
|
92 |
+
int i = s.rfind('.');
|
93 |
+
return i == -1 ? s : substr(s, 0, i);
|
94 |
+
}
|
95 |
+
|
96 |
+
bool get_files_in_dir(string dirname, bool fullpath, vector<string> &files) {
|
97 |
+
DIR *dir = opendir(dirname.c_str());
|
98 |
+
if(!dir) return false;
|
99 |
+
while(true) {
|
100 |
+
dirent *ent = readdir(dir);
|
101 |
+
if(!ent) break;
|
102 |
+
// For some reason, sometimes files show up as d_type == DT_UNKNOWN, I
|
103 |
+
// think due to AFS issues
|
104 |
+
//cout << "FFF " << ent->d_name << ' ' << (int)ent->d_type << endl;
|
105 |
+
if(ent->d_type != DT_DIR) {
|
106 |
+
files.push_back((fullpath ? dirname+"/" : string()) + ent->d_name);
|
107 |
+
}
|
108 |
+
}
|
109 |
+
closedir(dir);
|
110 |
+
return true;
|
111 |
+
}
|
basic/std.h
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __STD_H__
|
2 |
+
#define __STD_H__
|
3 |
+
|
4 |
+
#include <assert.h>
|
5 |
+
#include <math.h>
|
6 |
+
#include <stdlib.h>
|
7 |
+
//#include <values.h>
|
8 |
+
#include <limits.h>
|
9 |
+
#include <string.h>
|
10 |
+
#include <iostream>
|
11 |
+
#include <algorithm>
|
12 |
+
#include <iomanip>
|
13 |
+
#include <fstream>
|
14 |
+
#include <sstream>
|
15 |
+
#include <vector>
|
16 |
+
#include <string>
|
17 |
+
#include <queue>
|
18 |
+
#include <unordered_map>
|
19 |
+
#include <unordered_set>
|
20 |
+
|
21 |
+
using namespace std;
|
22 |
+
|
23 |
+
typedef long intIndex;
|
24 |
+
|
25 |
+
#define INT_SIZED(x) assert((x) < 2147483648L)
|
26 |
+
|
27 |
+
////////////////////////////////////////////////////////////
|
28 |
+
|
29 |
+
#define len(vec) (intIndex)(vec).size()
|
30 |
+
#define sq(x) ((x)*(x))
|
31 |
+
|
32 |
+
// For loop sugar. This is such a hack!
|
33 |
+
#define foridx(i, n) for(intIndex i = 0; i < n; i++)
|
34 |
+
#define forsidx(i, n) for(int i = 0; i < n; i++)
|
35 |
+
#define forvec(i, tx, x, vec) for(intIndex i = 0, _##i = 0; i < len(vec); i++) \
|
36 |
+
for(tx x = (vec)[i]; i == _##i; _##i++)
|
37 |
+
#define formap(tx, x, ty, y, t, map) forstl(t, _##x##y, map) _mapvars(tx, x, ty, y)
|
38 |
+
#define forcmap(tx, x, ty, y, t, map) forcstl(t, _##x##y, map) _mapvars(tx, x, ty, y)
|
39 |
+
#define forstl(t, x, container) for(t::iterator x = (container).begin(); x != (container).end(); x++)
|
40 |
+
#define forcstl(t, x, container) for(t::const_iterator x = (container).begin(); x != (container).end(); x++)
|
41 |
+
#define _mapvars(tx, x, ty, y) for(tx x = _##x##y->first, *_##x = &x; _##x; _##x = NULL) \
|
42 |
+
for(ty y = _##x##y->second, *_##y = &y; _##y; _##y = NULL)
|
43 |
+
|
44 |
+
////////////////////////////////////////////////////////////
|
45 |
+
// Generate random numbers.
|
46 |
+
|
47 |
+
inline intIndex mrand(intIndex a) { return rand() % a; }
|
48 |
+
inline intIndex mrand(intIndex a, intIndex b) { return rand() % (b-a) + a; }
|
49 |
+
inline double rand_double() {
|
50 |
+
static const intIndex BASE = 100000;
|
51 |
+
return (double)(rand()%BASE)/BASE;
|
52 |
+
}
|
53 |
+
|
54 |
+
////////////////////////////////////////////////////////////
|
55 |
+
// Floating point stuff.
|
56 |
+
|
57 |
+
const double TOL = 1e-10;
|
58 |
+
|
59 |
+
inline bool flt(double u, double v) { return u + TOL < v; }
|
60 |
+
inline bool fgt(double u, double v) { return u - TOL > v; }
|
61 |
+
|
62 |
+
// Comparing floating point numbers.
|
63 |
+
inline bool feq(double u, double v, double tol = TOL) { return fabs(u-v) < tol; }
|
64 |
+
|
65 |
+
template <class T> inline intIndex sign(T u) {
|
66 |
+
if(u < 0) return -1;
|
67 |
+
if(u > 0) return 1;
|
68 |
+
return 0;
|
69 |
+
}
|
70 |
+
|
71 |
+
#define assert_feq(u, v) do { _assert_feq(u, v, __FILE__, __LINE__); } while(0);
|
72 |
+
#define assert_feq2(u, v, tol) do { _assert_feq(u, v, tol, __FILE__, __LINE__); } while(0);
|
73 |
+
#define assert_fneq(u, v) do { _assert_fneq(u, v, __FILE__, __LINE__); } while(0);
|
74 |
+
inline void _assert_feq(double u, double v, const char *file, int line) {
|
75 |
+
if(!feq(u, v)) { printf("At %s:%d, %f != %f\n", file, line, u, v); assert(0); }
|
76 |
+
}
|
77 |
+
inline void _assert_feq(double u, double v, double tol, const char *file, int line) {
|
78 |
+
if(!feq(u, v, tol)) { printf("At %s:%d, %f != %f\n", file, line, u, v); assert(0); }
|
79 |
+
}
|
80 |
+
inline void _assert_fneq(double u, double v, const char *file, int line) {
|
81 |
+
if(feq(u, v)) { printf("At %s:%d, %f == %f\n", file, line, u, v); assert(0); }
|
82 |
+
}
|
83 |
+
#define assert_eq(u, v) do { _assert_eq(u, v, __STRING(u), __STRING(v), __FILE__, __LINE__); } while(0)
|
84 |
+
template<class T> inline void _assert_eq(const T &u, const T &v, const char *us, const char *vs, const char *file, int line) {
|
85 |
+
if(u != v) {
|
86 |
+
cout << "At " << file << ':' << line << ", " <<
|
87 |
+
us << '(' << u << ')' << " != " <<
|
88 |
+
vs << '(' << v << ')' << endl;
|
89 |
+
assert(0);
|
90 |
+
}
|
91 |
+
}
|
92 |
+
|
93 |
+
#define assert2(x, reason) \
|
94 |
+
do { \
|
95 |
+
if(!(x)) { \
|
96 |
+
cout << "\nFAILURE REASON: " << reason << endl; \
|
97 |
+
assert(x); \
|
98 |
+
} \
|
99 |
+
} while(0)
|
100 |
+
|
101 |
+
string now();
|
102 |
+
string hostname();
|
103 |
+
int cpu_speed_mhz();
|
104 |
+
long mem_usage(); // in kB
|
105 |
+
|
106 |
+
bool create_file(const char *file);
|
107 |
+
bool file_exists(const char *file);
|
108 |
+
time_t file_modified_time(const char *file);
|
109 |
+
|
110 |
+
string strip_dir(string s);
|
111 |
+
string get_dir(string s);
|
112 |
+
string file_base(string s);
|
113 |
+
bool get_files_in_dir(string dirname, bool fullpath, vector<string> &files);
|
114 |
+
|
115 |
+
#endif
|
basic/stl-basic.cc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
#include "stl-basic.h"
|
basic/stl-basic.h
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __STL_BASIC_H__
|
2 |
+
#define __STL_BASIC_H__
|
3 |
+
|
4 |
+
#include "std.h"
|
5 |
+
#include "city.h"
|
6 |
+
|
7 |
+
////////////////////////////////////////////////////////////
|
8 |
+
|
9 |
+
typedef double real;
|
10 |
+
//typedef float real;
|
11 |
+
|
12 |
+
typedef pair<int, int> IntPair;
|
13 |
+
typedef pair<int, real> IntDouble;
|
14 |
+
typedef pair<real, int> DoubleInt;
|
15 |
+
typedef pair<real, real> DoublePair;
|
16 |
+
typedef vector<IntPair> IntPairVec;
|
17 |
+
typedef vector<DoubleInt> DoubleIntVec;
|
18 |
+
typedef vector<bool> BoolVec;
|
19 |
+
typedef vector<int> IntVec;
|
20 |
+
typedef vector<string> StringVec;
|
21 |
+
typedef vector<IntVec> IntMat;
|
22 |
+
typedef vector<IntVec> IntVecVec;
|
23 |
+
typedef vector<IntVecVec> IntVecVecVec;
|
24 |
+
typedef vector<IntVecVecVec> IntVecVecVecVec;
|
25 |
+
typedef vector<real> DoubleVec;
|
26 |
+
typedef vector<DoubleVec> DoubleVecVec;
|
27 |
+
typedef vector<DoubleVecVec> DoubleVecVecVec;
|
28 |
+
typedef vector<DoubleVecVecVec> DoubleVecVecVecVec;
|
29 |
+
typedef vector<IntDouble> IntDoubleVec;
|
30 |
+
typedef vector<IntDoubleVec> IntDoubleVecVec;
|
31 |
+
typedef vector<IntDoubleVecVec> IntDoubleVecVecVec;
|
32 |
+
typedef vector<IntDoubleVecVecVec> IntDoubleVecVecVecVec;
|
33 |
+
|
34 |
+
typedef IntVec ivector;
|
35 |
+
typedef DoubleVec fvector;
|
36 |
+
typedef DoubleVecVec fmatrix;
|
37 |
+
|
38 |
+
////////////////////////////////////////////////////////////
|
39 |
+
|
40 |
+
struct vector_eq {
|
41 |
+
bool operator()(const IntVec &v1, const IntVec &v2) const {
|
42 |
+
return v1 == v2;
|
43 |
+
}
|
44 |
+
};
|
45 |
+
struct vector_hf {
|
46 |
+
size_t operator()(const IntVec &v) const {
|
47 |
+
return CityHash64(reinterpret_cast<const char*>(&v[0]), sizeof(int) * v.size());
|
48 |
+
#if 0
|
49 |
+
int h = 0;
|
50 |
+
foridx(i, len(v))
|
51 |
+
h = (h<<4)^(h>>28)^v[i];
|
52 |
+
return h;
|
53 |
+
#endif
|
54 |
+
}
|
55 |
+
};
|
56 |
+
|
57 |
+
struct pair_eq {
|
58 |
+
bool operator()(const IntPair &p1, const IntPair &p2) const {
|
59 |
+
return p1 == p2;
|
60 |
+
}
|
61 |
+
};
|
62 |
+
struct pair_hf {
|
63 |
+
size_t operator()(const IntPair &p) const {
|
64 |
+
return (p.first<<4)^(p.first>>28) ^ p.second;
|
65 |
+
}
|
66 |
+
};
|
67 |
+
|
68 |
+
struct str_eq {
|
69 |
+
bool operator()(const char *s1, const char *s2) const {
|
70 |
+
return strcmp(s1, s2) == 0;
|
71 |
+
}
|
72 |
+
};
|
73 |
+
struct str_hf {
|
74 |
+
size_t operator()(const char *s) const {
|
75 |
+
return CityHash64(s, strlen(s));
|
76 |
+
}
|
77 |
+
};
|
78 |
+
|
79 |
+
struct string_eq {
|
80 |
+
bool operator()(const string &s1, const string &s2) const {
|
81 |
+
return s1 == s2;
|
82 |
+
}
|
83 |
+
};
|
84 |
+
struct string_hf {
|
85 |
+
size_t operator()(const string &s) const {
|
86 |
+
return CityHash64(s.c_str(), s.size());
|
87 |
+
}
|
88 |
+
};
|
89 |
+
|
90 |
+
////////////////////////////////////////////////////////////
|
91 |
+
|
92 |
+
typedef unordered_set<int> IntSet;
|
93 |
+
typedef unordered_set<IntPair, pair_hf, pair_eq> IntPairSet;
|
94 |
+
typedef unordered_set<IntVec, vector_hf, vector_eq> IntVecSet;
|
95 |
+
typedef unordered_map<IntVec, real, vector_hf, vector_eq> IntVecDoubleMap;
|
96 |
+
typedef unordered_map<IntVec, int, vector_hf, vector_eq> IntVecIntMap;
|
97 |
+
typedef unordered_map<int, int> IntIntMap;
|
98 |
+
typedef unordered_map<int, real> IntDoubleMap;
|
99 |
+
typedef unordered_map<int, IntPair> IntIntPairMap;
|
100 |
+
typedef unordered_map<int, IntVec> IntIntVecMap;
|
101 |
+
typedef unordered_map<int, IntIntMap> IntIntIntMapMap;
|
102 |
+
typedef unordered_map<IntPair, int, pair_hf, pair_eq> IntPairIntMap;
|
103 |
+
typedef unordered_map<IntPair, real, pair_hf, pair_eq> IntPairDoubleMap;
|
104 |
+
typedef unordered_map<IntPair, DoubleVec, pair_hf, pair_eq> IntPairDoubleVecMap;
|
105 |
+
typedef unordered_map<IntVec, IntVec, vector_hf, vector_eq> IntVecIntVecMap;
|
106 |
+
typedef unordered_map<IntVec, DoubleVec, vector_hf, vector_eq> IntVecDoubleVecMap;
|
107 |
+
typedef vector<IntIntMap> IntIntMapVec;
|
108 |
+
|
109 |
+
typedef vector<const char *> StrVec;
|
110 |
+
typedef unordered_map<const char *, int, str_hf, str_eq> StrIntMap;
|
111 |
+
typedef unordered_map<const char *, const char *, str_hf, str_eq> StrStrMap;
|
112 |
+
|
113 |
+
#endif
|
basic/stl-utils.cc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
#include "stl-utils.h"
|
basic/stl-utils.h
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __STL_UTILS__
|
2 |
+
#define __STL_UTILS__
|
3 |
+
|
4 |
+
#include "stl-basic.h"
|
5 |
+
#include <stdarg.h>
|
6 |
+
|
7 |
+
#define contains(X, x) ((X).find(x) != (X).end())
|
8 |
+
|
9 |
+
inline void improve(DoubleInt &x, const DoubleInt &y) {
|
10 |
+
if(y.first > x.first) x = y; // Bigger is better.
|
11 |
+
}
|
12 |
+
|
13 |
+
template<class Compare> inline void improve(DoubleInt &x, const DoubleInt &y, Compare compare) {
|
14 |
+
if(compare(y.first, x.first)) x = y;
|
15 |
+
}
|
16 |
+
|
17 |
+
// Free up the memory in a vector or hash_map.
|
18 |
+
template<class T> void destroy(T &obj) {
|
19 |
+
T empty_obj;
|
20 |
+
obj.swap(empty_obj);
|
21 |
+
}
|
22 |
+
|
23 |
+
template<class T> int index_of(const vector<T> &vec, const T &x, int i0 = 0) {
|
24 |
+
for(int i = i0; i < len(vec); i++)
|
25 |
+
if(vec[i] == x) return i;
|
26 |
+
return -1;
|
27 |
+
}
|
28 |
+
|
29 |
+
template<class T> int count_of(const vector<T> &vec, const T &x) {
|
30 |
+
int n = 0;
|
31 |
+
forvec(_, const T &, y, vec)
|
32 |
+
if(x == y) n++;
|
33 |
+
return n;
|
34 |
+
}
|
35 |
+
|
36 |
+
// Get vec[i], but if i is out of range, expand the vector and fill
|
37 |
+
// everything with x.
|
38 |
+
template<class T> T &expand_get(vector<T> &vec, int i, const T &x) {
|
39 |
+
int n = len(vec);
|
40 |
+
if(i >= n) {
|
41 |
+
vec.resize(i+1);
|
42 |
+
for(int ii = n; ii <= i; ii++) vec[ii] = x;
|
43 |
+
}
|
44 |
+
return vec[i];
|
45 |
+
}
|
46 |
+
template<class T> T &expand_get(vector< vector<T> > &mat, int i, int j, const T &x) {
|
47 |
+
int n = len(mat);
|
48 |
+
if(i >= n) mat.resize(i+1);
|
49 |
+
return expand_get(mat[i], j, x);
|
50 |
+
}
|
51 |
+
template<class T> T &expand_get(vector< vector< vector<T> > > &mat, int i, int j, int k, const T &x) {
|
52 |
+
int n = len(mat);
|
53 |
+
if(i >= n) mat.resize(i+1);
|
54 |
+
return expand_get(mat[i], j, k, x);
|
55 |
+
}
|
56 |
+
|
57 |
+
// Assuming this vector/matrix will not grow any more,
|
58 |
+
// we can safely call compact to reduce the memory usage.
|
59 |
+
// This is only effective after deletions.
|
60 |
+
// This isn't necessary if we haven't actually touched
|
61 |
+
// the memory past size (i.e., we didn't have a bigger
|
62 |
+
// structure).
|
63 |
+
template<class T> void vector_compact(vector<T> &vec) {
|
64 |
+
vector<T> new_vec(len(vec));
|
65 |
+
new_vec = vec;
|
66 |
+
vec.swap(new_vec);
|
67 |
+
}
|
68 |
+
template<class T> void matrix_compact(vector< vector<T> > &mat) {
|
69 |
+
vector< vector<T> > new_mat(len(mat));
|
70 |
+
foridx(i, len(mat)) compact(mat[i]);
|
71 |
+
new_mat = mat;
|
72 |
+
mat.swap(new_mat);
|
73 |
+
}
|
74 |
+
|
75 |
+
// Append to a vector and return the value type.
|
76 |
+
template<class T> inline T &push_back(vector<T> &vec, const T &x = T()) {
|
77 |
+
vec.push_back(x);
|
78 |
+
return vec[len(vec)-1];
|
79 |
+
}
|
80 |
+
|
81 |
+
template<class T> inline void matrix_resize(vector< vector<T> > &mat, int nr, int nc) {
|
82 |
+
mat.resize(nr);
|
83 |
+
foridx(r, nr) mat[r].resize(nc);
|
84 |
+
}
|
85 |
+
|
86 |
+
template<class T> inline void matrix_resize(vector< vector< vector<T> > > &mat, int n1, int n2, int n3) {
|
87 |
+
mat.resize(n1);
|
88 |
+
foridx(i, n1) {
|
89 |
+
mat[i].resize(n2);
|
90 |
+
foridx(j, n2)
|
91 |
+
mat[i][j].resize(n3);
|
92 |
+
}
|
93 |
+
}
|
94 |
+
|
95 |
+
template<class T> inline vector< vector<T> > new_matrix(int nr, int nc, T v) {
|
96 |
+
vector< vector<T> > mat;
|
97 |
+
mat.resize(nr);
|
98 |
+
foridx(r, nr) {
|
99 |
+
mat[r].resize(nc);
|
100 |
+
foridx(c, nc)
|
101 |
+
mat[r][c] = v;
|
102 |
+
}
|
103 |
+
return mat;
|
104 |
+
}
|
105 |
+
|
106 |
+
template<class T> inline void matrix_fill(vector< vector<T> > &mat, T v) {
|
107 |
+
foridx(i, len(mat)) vector_fill(mat[i], v);
|
108 |
+
}
|
109 |
+
|
110 |
+
template<class T> inline void vector_fill(vector<T> &vec, T v) {
|
111 |
+
foridx(i, len(vec)) vec[i] = v;
|
112 |
+
}
|
113 |
+
|
114 |
+
template<class T> inline T vector_sum(const vector<T> &vec) {
|
115 |
+
T sum = 0;
|
116 |
+
foridx(i, len(vec)) sum += vec[i];
|
117 |
+
return sum;
|
118 |
+
}
|
119 |
+
|
120 |
+
// Returns the index of the minimum element in vec.
|
121 |
+
template<class T> inline int vector_index_min(const vector<T> &vec) {
|
122 |
+
T min = vec[0];
|
123 |
+
int best_i = 0;
|
124 |
+
foridx(i, len(vec)) {
|
125 |
+
if(vec[i] < min) {
|
126 |
+
min = vec[i];
|
127 |
+
best_i = i;
|
128 |
+
}
|
129 |
+
}
|
130 |
+
return best_i;
|
131 |
+
}
|
132 |
+
|
133 |
+
template<class T> inline int vector_min(const vector<T> &vec) {
|
134 |
+
return vec[vector_index_min(vec)];
|
135 |
+
}
|
136 |
+
|
137 |
+
// Returns the index of the maximum element in vec.
|
138 |
+
template<class T> inline intIndex vector_index_max(const vector<T> &vec) {
|
139 |
+
T max = vec[0];
|
140 |
+
int best_i = 0;
|
141 |
+
forsidx(i, len(vec)) {
|
142 |
+
if(vec[i] > max) {
|
143 |
+
max = vec[i];
|
144 |
+
best_i = i;
|
145 |
+
}
|
146 |
+
}
|
147 |
+
return best_i;
|
148 |
+
}
|
149 |
+
|
150 |
+
template<class T> inline int vector_max(const vector<T> &vec) {
|
151 |
+
return vec[vector_index_max(vec)];
|
152 |
+
}
|
153 |
+
|
154 |
+
// Returns the index of the maximum element in vec.
|
155 |
+
template<class T> inline IntPair matrix_index_max(const vector< vector<T> > &mat) {
|
156 |
+
T max = mat[0][0];
|
157 |
+
IntPair best_ij = IntPair(0, 0);
|
158 |
+
forsidx(i, len(mat)) {
|
159 |
+
forsidx(j, len(mat[i])) {
|
160 |
+
if(mat[i][j] > max) {
|
161 |
+
max = mat[i][j];
|
162 |
+
best_ij = IntPair(i, j);
|
163 |
+
}
|
164 |
+
}
|
165 |
+
}
|
166 |
+
return best_ij;
|
167 |
+
}
|
168 |
+
|
169 |
+
// Returns the sum of the elements in column c.
|
170 |
+
template<class T> inline T matrix_col_sum(const vector< vector<T> > &mat, int c) {
|
171 |
+
T sum = 0;
|
172 |
+
foridx(r, len(mat)) sum += mat[r][c];
|
173 |
+
return sum;
|
174 |
+
}
|
175 |
+
|
176 |
+
template<class T1, class T2> ostream &operator<<(ostream &out, const pair<T1, T2> &p) {
|
177 |
+
return out << p.first << ' ' << p.second;
|
178 |
+
}
|
179 |
+
|
180 |
+
template<class T> ostream &operator<<(ostream &out, const vector<T> &vec) {
|
181 |
+
foridx(i, len(vec)) {
|
182 |
+
if(i > 0) out << ' ';
|
183 |
+
out << vec[i];
|
184 |
+
}
|
185 |
+
return out;
|
186 |
+
}
|
187 |
+
|
188 |
+
template<class T> ostream &operator<<(ostream &out, const vector< vector<T> > &mat) {
|
189 |
+
foridx(r, len(mat)) out << mat[r] << endl;
|
190 |
+
return out;
|
191 |
+
}
|
192 |
+
|
193 |
+
template<class T> vector<T> subvector(const vector<T> &vec, intIndex i, intIndex j = -1) {
|
194 |
+
intIndex N = len(vec);
|
195 |
+
if(j < 0) j += N;
|
196 |
+
if(j < i) j = i;
|
197 |
+
|
198 |
+
// Probably some fancy STL way to do this.
|
199 |
+
vector<T> subvec(j-i);
|
200 |
+
foridx(k, j-i) subvec[k] = vec[i+k];
|
201 |
+
return subvec;
|
202 |
+
}
|
203 |
+
|
204 |
+
template<class T> vector<T> to_vector(T arr[], int n) {
|
205 |
+
vector<T> vec(n);
|
206 |
+
foridx(i, n) vec[i] = arr[i];
|
207 |
+
return vec;
|
208 |
+
}
|
209 |
+
|
210 |
+
inline IntVec to_vector(int n, ...) {
|
211 |
+
va_list ap;
|
212 |
+
IntVec vec;
|
213 |
+
va_start(ap, n);
|
214 |
+
foridx(i, n) vec.push_back(va_arg(ap, int));
|
215 |
+
va_end(ap);
|
216 |
+
return vec;
|
217 |
+
}
|
218 |
+
|
219 |
+
inline DoubleVec to_fvector(int n, ...) {
|
220 |
+
va_list ap;
|
221 |
+
DoubleVec vec;
|
222 |
+
va_start(ap, n);
|
223 |
+
foridx(i, n) vec.push_back(va_arg(ap, double));
|
224 |
+
va_end(ap);
|
225 |
+
return vec;
|
226 |
+
}
|
227 |
+
|
228 |
+
template<class T> inline void operator+=(vector<T> &vec1, const vector<T> &vec2) {
|
229 |
+
foridx(i, len(vec1)) vec1[i] += vec2[i];
|
230 |
+
}
|
231 |
+
|
232 |
+
#endif
|
basic/str-str-db.cc
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "str-str-db.h"
|
2 |
+
#include "std.h"
|
3 |
+
#include "str.h"
|
4 |
+
#include "strdb.h"
|
5 |
+
|
6 |
+
StrStrDB::~StrStrDB() {
|
7 |
+
destroy_strings(s2t);
|
8 |
+
}
|
9 |
+
|
10 |
+
// File format: lines of <t>\t<s>\t<...junk...>
|
11 |
+
void StrStrDB::read(const char *file) {
|
12 |
+
track("StrStrDB::read()", file, true);
|
13 |
+
char buf[1024];
|
14 |
+
ifstream in(file);
|
15 |
+
assert2(in, file);
|
16 |
+
|
17 |
+
// Read the s2t for each word.
|
18 |
+
max_t_len = 0;
|
19 |
+
while(in.getline(buf, sizeof(buf))) {
|
20 |
+
char *t = strtok(buf, "\t");
|
21 |
+
char *s = strtok(NULL, "\t");
|
22 |
+
assert(s && t);
|
23 |
+
|
24 |
+
assert2(!contains(s2t, s), s << " appears twice");
|
25 |
+
s2t[copy_str(s)] = copy_str(t);
|
26 |
+
max_t_len = max(max_t_len, (int)strlen(t));
|
27 |
+
}
|
28 |
+
logs("Read " << len(s2t) << " strings");
|
29 |
+
logs("Longest mapped string is " << max_t_len << " characters.");
|
30 |
+
}
|
31 |
+
|
32 |
+
const char *StrStrDB::operator[](const char *word) const {
|
33 |
+
StrStrMap::const_iterator it = s2t.find(word);
|
34 |
+
return it == s2t.end() ? "" : it->second;
|
35 |
+
}
|
basic/str-str-db.h
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __STR_STR_DB_H__
|
2 |
+
#define __STR_STR_DB_H__
|
3 |
+
|
4 |
+
#include "stl-basic.h"
|
5 |
+
|
6 |
+
// Maps strings (s) to strings (t).
|
7 |
+
class StrStrDB {
|
8 |
+
public:
|
9 |
+
~StrStrDB();
|
10 |
+
|
11 |
+
void read(const char *file);
|
12 |
+
const char *operator[](const char *s) const;
|
13 |
+
|
14 |
+
int max_t_len;
|
15 |
+
private:
|
16 |
+
StrStrMap s2t;
|
17 |
+
};
|
18 |
+
|
19 |
+
#endif
|
basic/str.cc
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "stl-basic.h"
|
2 |
+
#include <stdarg.h>
|
3 |
+
|
4 |
+
string substr(const string &s, int i, int j) {
|
5 |
+
if(i < 0) i += len(s);
|
6 |
+
if(j < 0) j += len(s);
|
7 |
+
i = max(i, 0);
|
8 |
+
j = max(j, i);
|
9 |
+
return s.substr(i, j-i);
|
10 |
+
}
|
11 |
+
string substr(const string &s, int i) {
|
12 |
+
return substr(s, i, len(s));
|
13 |
+
}
|
14 |
+
|
15 |
+
string str_printf(const char *fmt, ...) {
|
16 |
+
char buf[16384];
|
17 |
+
va_list ap;
|
18 |
+
va_start(ap, fmt);
|
19 |
+
vsnprintf(buf, sizeof(buf), fmt, ap);
|
20 |
+
va_end(ap);
|
21 |
+
return buf;
|
22 |
+
}
|
23 |
+
|
24 |
+
char *copy_str(const char *s) {
|
25 |
+
char *t = new char[strlen(s)+1];
|
26 |
+
strcpy(t, s);
|
27 |
+
return t;
|
28 |
+
}
|
29 |
+
|
30 |
+
string int2str(int x) {
|
31 |
+
return str_printf("%d", x);
|
32 |
+
}
|
33 |
+
|
34 |
+
string double2str(double x) {
|
35 |
+
ostringstream os;
|
36 |
+
os << x;
|
37 |
+
return os.str();
|
38 |
+
}
|
39 |
+
|
40 |
+
StringVec split(const char *str, const char *delims, bool keep_empty) {
|
41 |
+
StringVec vec; // Store the result.
|
42 |
+
// Build quick lookup table.
|
43 |
+
BoolVec is_delim(256);
|
44 |
+
for(const char *p = delims; *p; p++) is_delim[*p] = true;
|
45 |
+
is_delim['\0'] = true;
|
46 |
+
|
47 |
+
const char *end = str;
|
48 |
+
while(true) {
|
49 |
+
if(is_delim[*end]) {
|
50 |
+
if(keep_empty || end-str > 0) // Extract token.
|
51 |
+
vec.push_back(string(str, end-str));
|
52 |
+
str = end+1;
|
53 |
+
}
|
54 |
+
if(!*end) break;
|
55 |
+
end++;
|
56 |
+
}
|
57 |
+
return vec;
|
58 |
+
}
|
59 |
+
|
60 |
+
StrVec mutate_split(char *str, const char *delims) {
|
61 |
+
StrVec vec;
|
62 |
+
for(char *p = strtok(str, delims); p; p = strtok(NULL, delims))
|
63 |
+
vec.push_back(p);
|
64 |
+
return vec;
|
65 |
+
}
|
66 |
+
|
67 |
+
// Remove leading and trailing white space.
|
68 |
+
char *trim(char *s) {
|
69 |
+
// Removing leading spaces.
|
70 |
+
while(*s && isspace(*s)) s++;
|
71 |
+
|
72 |
+
// Remove trailing spaces.
|
73 |
+
char *t;
|
74 |
+
for(t = s+strlen(s)-1; t != s && isspace(*t); t--);
|
75 |
+
t[1] = '\0';
|
76 |
+
return s;
|
77 |
+
}
|
78 |
+
|
79 |
+
string tolower(const char *s) {
|
80 |
+
string t = s;
|
81 |
+
foridx(i, len(t)) t[i] = tolower(t[i]);
|
82 |
+
return t;
|
83 |
+
}
|
84 |
+
|
85 |
+
// String matching with brute force.
|
86 |
+
int index_of(const char *s, const char *t) {
|
87 |
+
int ns = strlen(s), nt = strlen(t);
|
88 |
+
foridx(i, ns-nt+1)
|
89 |
+
if(strncmp(s+i, t, nt) == 0) return i;
|
90 |
+
return -1;
|
91 |
+
}
|
basic/str.h
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __STR_H__
|
2 |
+
#define __STR_H__
|
3 |
+
|
4 |
+
#include "stl-basic.h"
|
5 |
+
|
6 |
+
string substr(const string &s, int i, int j);
|
7 |
+
string substr(const string &s, int i);
|
8 |
+
|
9 |
+
string str_printf(const char *fmt, ...);
|
10 |
+
char *copy_str(const char *s);
|
11 |
+
string int2str(int x);
|
12 |
+
string double2str(double x);
|
13 |
+
|
14 |
+
StringVec split(const char *str, const char *delims, bool keep_empty);
|
15 |
+
StrVec mutate_split(char *str, const char *delims);
|
16 |
+
|
17 |
+
char *trim(char *s);
|
18 |
+
string tolower(const char *s);
|
19 |
+
|
20 |
+
int index_of(const char *s, const char *t);
|
21 |
+
|
22 |
+
#endif
|
basic/strdb.cc
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "strdb.h"
|
2 |
+
#include "str.h"
|
3 |
+
|
4 |
+
void destroy_strings(StrVec &vec) {
|
5 |
+
foridx(i, len(vec))
|
6 |
+
delete [] vec[i];
|
7 |
+
}
|
8 |
+
|
9 |
+
void destroy_strings(StrStrMap &map) {
|
10 |
+
typedef const char *const_char_ptr;
|
11 |
+
StrVec strs;
|
12 |
+
formap(const_char_ptr, s, const_char_ptr, t, StrStrMap, map) {
|
13 |
+
strs.push_back(s);
|
14 |
+
strs.push_back(t);
|
15 |
+
}
|
16 |
+
destroy_strings(strs);
|
17 |
+
}
|
18 |
+
|
19 |
+
////////////////////////////////////////////////////////////
|
20 |
+
|
21 |
+
int StrDB::read(istream &in, int N, bool one_way) {
|
22 |
+
char s[16384];
|
23 |
+
clear();
|
24 |
+
while(size() < N && in >> s) {
|
25 |
+
if(one_way) i2s.push_back(copy_str(s));
|
26 |
+
else (*this)[s];
|
27 |
+
}
|
28 |
+
logs(size() << " strings read");
|
29 |
+
return size();
|
30 |
+
}
|
31 |
+
|
32 |
+
int StrDB::read(const char *file, bool one_way) {
|
33 |
+
track("StrDB::read()", file << ", one_way=" << one_way, true);
|
34 |
+
ifstream in(file);
|
35 |
+
assert(in);
|
36 |
+
return read(in, INT_MAX, one_way);
|
37 |
+
}
|
38 |
+
|
39 |
+
void StrDB::write(ostream &out) {
|
40 |
+
foridx(i, size())
|
41 |
+
out << i2s[i] << endl;
|
42 |
+
logs(size() << " strings written");
|
43 |
+
}
|
44 |
+
|
45 |
+
void StrDB::write(const char *file) {
|
46 |
+
track("StrDB::write()", file, true);
|
47 |
+
ofstream out(file);
|
48 |
+
write(out);
|
49 |
+
}
|
50 |
+
|
51 |
+
const char *StrDB::operator[](int i) const {
|
52 |
+
assert(i >= 0 && i < len(i2s));
|
53 |
+
return i2s[i];
|
54 |
+
}
|
55 |
+
|
56 |
+
int StrDB::lookup(const char *s, bool incorp_new, int default_i) {
|
57 |
+
StrIntMap::const_iterator it = s2i.find(s);
|
58 |
+
if(it != s2i.end()) return it->second;
|
59 |
+
if(incorp_new) {
|
60 |
+
char *t = copy_str(s);
|
61 |
+
int i = s2i[t] = len(i2s);
|
62 |
+
i2s.push_back(t);
|
63 |
+
return i;
|
64 |
+
}
|
65 |
+
else
|
66 |
+
return default_i;
|
67 |
+
}
|
68 |
+
|
69 |
+
IntVec StrDB::lookup(const StrVec &svec) {
|
70 |
+
IntVec ivec(len(svec));
|
71 |
+
foridx(i, len(svec))
|
72 |
+
ivec[i] = lookup(svec[i], true, -1);
|
73 |
+
return ivec;
|
74 |
+
}
|
75 |
+
|
76 |
+
int StrDB::operator[](const char *s) const {
|
77 |
+
StrIntMap::const_iterator it = s2i.find(s);
|
78 |
+
if(it != s2i.end()) return it->second;
|
79 |
+
return -1;
|
80 |
+
}
|
81 |
+
|
82 |
+
int StrDB::operator[](const char *s) {
|
83 |
+
return lookup(s, true, -1);
|
84 |
+
}
|
85 |
+
|
86 |
+
ostream &operator<<(ostream &out, const StrDB &db) {
|
87 |
+
foridx(i, len(db)) out << db[i] << endl;
|
88 |
+
return out;
|
89 |
+
}
|
90 |
+
|
91 |
+
////////////////////////////////////////////////////////////
|
92 |
+
|
93 |
+
int IntPairIntDB::lookup(const IntPair &p, bool incorp_new, int default_i) {
|
94 |
+
IntPairIntMap::const_iterator it = p2i.find(p);
|
95 |
+
if(it != p2i.end()) return it->second;
|
96 |
+
|
97 |
+
if(incorp_new) {
|
98 |
+
int i = p2i[p] = len(i2p);
|
99 |
+
i2p.push_back(p);
|
100 |
+
return i;
|
101 |
+
}
|
102 |
+
else
|
103 |
+
return default_i;
|
104 |
+
}
|
105 |
+
|
106 |
+
int IntPairIntDB::read(istream &in, int N) {
|
107 |
+
assert(size() == 0);
|
108 |
+
int a, b;
|
109 |
+
while(size() < N && in >> a >> b)
|
110 |
+
(*this)[IntPair(a, b)];
|
111 |
+
return size();
|
112 |
+
}
|
113 |
+
|
114 |
+
void IntPairIntDB::write(ostream &out) {
|
115 |
+
forvec(_, const IntPair &, p, i2p)
|
116 |
+
out << p.first << ' ' << p.second << endl;
|
117 |
+
}
|
118 |
+
|
119 |
+
////////////////////////////////////////////////////////////
|
120 |
+
|
121 |
+
int IntVecIntDB::lookup(const IntVec &v, bool incorp_new, int default_i) {
|
122 |
+
IntVecIntMap::const_iterator it = v2i.find(v);
|
123 |
+
if(it != v2i.end()) return it->second;
|
124 |
+
|
125 |
+
if(incorp_new) {
|
126 |
+
int i = v2i[v] = len(i2v);
|
127 |
+
i2v.push_back(v);
|
128 |
+
return i;
|
129 |
+
}
|
130 |
+
else
|
131 |
+
return default_i;
|
132 |
+
}
|
133 |
+
|
134 |
+
////////////////////////////////////////////////////////////
|
135 |
+
|
136 |
+
// A text is basically a string of words.
|
137 |
+
// Normally, we just read the strings from file, put them in db,
|
138 |
+
// and call back func.
|
139 |
+
// But if the db already exists and the strings have been converted
|
140 |
+
// into integers (i.e., <file>.{strdb,int} exist), then use those.
|
141 |
+
// If incorp_new is false, then words not in db will just get passed -1.
|
142 |
+
typedef void int_func(int a);
|
143 |
+
void read_text(const char *file, int_func *func, StrDB &db, bool read_cached, bool write_cached, bool incorp_new) {
|
144 |
+
track("read_text()", file, true);
|
145 |
+
|
146 |
+
string strdb_file = string(file)+".strdb";
|
147 |
+
string int_file = string(file)+".int";
|
148 |
+
|
149 |
+
// Use the cached strdb and int files only if they exist and they are
|
150 |
+
// newer than the text file.
|
151 |
+
read_cached &= file_exists(strdb_file.c_str()) &&
|
152 |
+
file_exists(int_file.c_str()) &&
|
153 |
+
file_modified_time(strdb_file.c_str()) > file_modified_time(file) &&
|
154 |
+
file_modified_time(int_file.c_str()) > file_modified_time(file);
|
155 |
+
|
156 |
+
if(read_cached) {
|
157 |
+
// Read from strdb and int.
|
158 |
+
assert(db.size() == 0); // db must be empty because we're going to clobber it all
|
159 |
+
db.read(strdb_file.c_str(), true);
|
160 |
+
track_block("", "Reading from " << int_file, false) {
|
161 |
+
ifstream in(int_file.c_str());
|
162 |
+
char buf[16384];
|
163 |
+
while(true) {
|
164 |
+
in.read(buf, sizeof(buf));
|
165 |
+
if(in.gcount() == 0) break;
|
166 |
+
assert(in.gcount() % sizeof(int) == 0);
|
167 |
+
for(int buf_i = 0; buf_i < in.gcount(); buf_i += 4) {
|
168 |
+
int a = *((int *)(buf+buf_i));
|
169 |
+
assert(a >= 0 && a < db.size());
|
170 |
+
func(a);
|
171 |
+
}
|
172 |
+
}
|
173 |
+
}
|
174 |
+
}
|
175 |
+
else {
|
176 |
+
track_block("", "Reading from " << file, false) {
|
177 |
+
// Write to strdb and int.
|
178 |
+
ifstream in(file);
|
179 |
+
ofstream out;
|
180 |
+
|
181 |
+
if(write_cached) {
|
182 |
+
out.open(int_file.c_str());
|
183 |
+
if(!out) write_cached = false;
|
184 |
+
}
|
185 |
+
if(write_cached) logs("Writing to " << int_file);
|
186 |
+
|
187 |
+
char s[16384];
|
188 |
+
char buf[16384]; int buf_i = 0; // Output buffer
|
189 |
+
while(in >> s) { // Read a string
|
190 |
+
int a = db.lookup(s, incorp_new, -1);
|
191 |
+
if(func) func(a);
|
192 |
+
|
193 |
+
if(write_cached) {
|
194 |
+
if(buf_i + sizeof(a) > sizeof(buf)) { // Flush buffer if full
|
195 |
+
out.write(buf, buf_i);
|
196 |
+
buf_i = 0;
|
197 |
+
}
|
198 |
+
*((int *)(buf+buf_i)) = a;
|
199 |
+
buf_i += sizeof(a);
|
200 |
+
}
|
201 |
+
}
|
202 |
+
if(write_cached) // Final flush
|
203 |
+
out.write(buf, buf_i);
|
204 |
+
}
|
205 |
+
|
206 |
+
if(write_cached && create_file(strdb_file.c_str()))
|
207 |
+
db.write(strdb_file.c_str());
|
208 |
+
}
|
209 |
+
}
|
basic/strdb.h
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __STRDB_H__
|
2 |
+
#define __STRDB_H__
|
3 |
+
|
4 |
+
#include "std.h"
|
5 |
+
#include "stl-basic.h"
|
6 |
+
#include "stl-utils.h"
|
7 |
+
#include "logging.h"
|
8 |
+
|
9 |
+
void destroy_strings(StrVec &vec);
|
10 |
+
void destroy_strings(StrStrMap &map);
|
11 |
+
|
12 |
+
// Map between strings and integers.
|
13 |
+
// Strings must not have spaces in them.
|
14 |
+
// File format: strings, one per line. Assume strings are distinct.
|
15 |
+
struct StrDB {
|
16 |
+
StrDB() { }
|
17 |
+
~StrDB() { destroy_strings(); }
|
18 |
+
|
19 |
+
int read(istream &in, int n, bool one_way);
|
20 |
+
int read(const char *file, bool one_way);
|
21 |
+
|
22 |
+
void write(ostream &out);
|
23 |
+
void write(const char *file);
|
24 |
+
|
25 |
+
intIndex size() const { return len(i2s); }
|
26 |
+
void clear() { destroy_strings(); i2s.clear(); s2i.clear(); }
|
27 |
+
void destroy() { destroy_strings(); ::destroy(i2s); ::destroy(s2i); }
|
28 |
+
void destroy_s2i() { ::destroy(s2i); }
|
29 |
+
void clear_keep_strings() { i2s.clear(); s2i.clear(); }
|
30 |
+
|
31 |
+
const char *operator[](int i) const;
|
32 |
+
int operator[](const char *s) const;
|
33 |
+
int operator[](const char *s);
|
34 |
+
int lookup(const char *s, bool incorp_new, int default_i);
|
35 |
+
|
36 |
+
IntVec lookup(const StrVec &svec);
|
37 |
+
|
38 |
+
bool exists(const char *s) const { return s2i.find(s) != s2i.end(); }
|
39 |
+
|
40 |
+
// /usr/bin/top might not show the memory reduced.
|
41 |
+
void destroy_strings() { ::destroy_strings(i2s); }
|
42 |
+
|
43 |
+
StrVec i2s;
|
44 |
+
StrIntMap s2i;
|
45 |
+
};
|
46 |
+
|
47 |
+
ostream &operator<<(ostream &out, const StrDB &db);
|
48 |
+
|
49 |
+
////////////////////////////////////////////////////////////
|
50 |
+
|
51 |
+
// Map between IntPairs and ints.
|
52 |
+
struct IntPairIntDB {
|
53 |
+
IntPair operator[](int i) const { return i2p[i]; }
|
54 |
+
int operator[](const IntPair &p) { return lookup(p, true, -1); }
|
55 |
+
int lookup(const IntPair &p, bool incorp_new, int default_i);
|
56 |
+
intIndex size() const { return len(i2p); }
|
57 |
+
|
58 |
+
int read(istream &in, int N);
|
59 |
+
void write(ostream &out);
|
60 |
+
|
61 |
+
IntPairIntMap p2i;
|
62 |
+
IntPairVec i2p;
|
63 |
+
};
|
64 |
+
|
65 |
+
////////////////////////////////////////////////////////////
|
66 |
+
|
67 |
+
// Map between IntVecs and ints.
|
68 |
+
struct IntVecIntDB {
|
69 |
+
const IntVec &operator[](int i) const { return i2v[i]; }
|
70 |
+
int operator[](const IntVec &v) { return lookup(v, true, -1); }
|
71 |
+
int lookup(const IntVec &v, bool incorp_new, int default_i);
|
72 |
+
intIndex size() const { return len(i2v); }
|
73 |
+
|
74 |
+
IntVecIntMap v2i;
|
75 |
+
IntVecVec i2v;
|
76 |
+
};
|
77 |
+
|
78 |
+
////////////////////////////////////////////////////////////
|
79 |
+
|
80 |
+
#if 0
|
81 |
+
// Map between IntArrays and ints. Arrays terminate with -1.
|
82 |
+
struct IntArrayIntDB {
|
83 |
+
int *operator[](int i) const { return i2a[i]; }
|
84 |
+
int operator[](const IntArray &a) { return lookup(a, true, -1); }
|
85 |
+
int lookup(const IntArray &a, bool incorp_new, int default_i);
|
86 |
+
int size() const { return len(i2a); }
|
87 |
+
|
88 |
+
int read(istream &in, int N);
|
89 |
+
void write(ostream &out);
|
90 |
+
|
91 |
+
hash_map<int *, int, intarray_hf, intarray_eq> p2i;
|
92 |
+
vector<int *> i2a;
|
93 |
+
};
|
94 |
+
#endif
|
95 |
+
|
96 |
+
////////////////////////////////////////////////////////////
|
97 |
+
|
98 |
+
typedef void int_func(int a);
|
99 |
+
void read_text(const char *file, int_func *func, StrDB &db, bool read_cached, bool write_cached, bool incorp_new);
|
100 |
+
|
101 |
+
#endif
|
basic/timer.cc
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "timer.h"
|
2 |
+
|
3 |
+
ostream &operator<<(ostream &out, const Timer &timer) {
|
4 |
+
int ms = timer.ms;
|
5 |
+
int m = ms / 60000; ms %= 60000;
|
6 |
+
int h = m / 60; m %= 60;
|
7 |
+
if(h > 0) out << h << 'h';
|
8 |
+
if(h > 0 || m > 0) out << m << 'm';
|
9 |
+
out << ms/1000.0 << 's';
|
10 |
+
return out;
|
11 |
+
}
|
basic/timer.h
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __TIMER_H__
|
2 |
+
#define __TIMER_H__
|
3 |
+
|
4 |
+
#include <sys/types.h>
|
5 |
+
#include <sys/time.h>
|
6 |
+
#include <time.h>
|
7 |
+
#include <iostream>
|
8 |
+
|
9 |
+
using namespace std;
|
10 |
+
|
11 |
+
struct Timer {
|
12 |
+
Timer() { }
|
13 |
+
Timer(int ms) : ms(ms) { }
|
14 |
+
|
15 |
+
//void start() { clock_gettime(0, &start_time); }
|
16 |
+
void start() { gettimeofday(&start_time, NULL); }
|
17 |
+
Timer &stop() {
|
18 |
+
//clock_gettime(0, &end_time);
|
19 |
+
gettimeofday(&end_time, NULL);
|
20 |
+
ms = Timer::to_ms(end_time) - Timer::to_ms(start_time);
|
21 |
+
return *this;
|
22 |
+
}
|
23 |
+
//static int to_ms(const timespec &tv) { return tv.tv_sec*1000 + tv.tv_nsec/1000000; }
|
24 |
+
static int to_ms(const timeval &tv) { return tv.tv_sec*1000 + tv.tv_usec/1000; }
|
25 |
+
|
26 |
+
//timespec start_time;
|
27 |
+
//timespec end_time;
|
28 |
+
timeval start_time;
|
29 |
+
timeval end_time;
|
30 |
+
int ms;
|
31 |
+
};
|
32 |
+
|
33 |
+
ostream &operator<<(ostream &out, const Timer &timer);
|
34 |
+
|
35 |
+
#endif
|
basic/union-set.cc
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "union-set.h"
|
2 |
+
|
3 |
+
void UnionSet::Init(int n) {
|
4 |
+
parent.resize(n);
|
5 |
+
for(int v = 0; v < n; v++)
|
6 |
+
parent[v] = v;
|
7 |
+
}
|
8 |
+
|
9 |
+
// return whether u and v are in the same connected component;
|
10 |
+
// connect them if they aren't
|
11 |
+
bool UnionSet::Do(int u, int v, bool doit) {
|
12 |
+
int ru = GetRoot(u);
|
13 |
+
int rv = GetRoot(v);
|
14 |
+
if(ru == rv) return true;
|
15 |
+
if(doit) parent[ru] = rv;
|
16 |
+
return false;
|
17 |
+
}
|
18 |
+
|
19 |
+
int UnionSet::GetRoot(int v) {
|
20 |
+
int rv = v;
|
21 |
+
while(parent[rv] != rv)
|
22 |
+
rv = parent[rv];
|
23 |
+
while(v != rv) {
|
24 |
+
int pv = parent[v];
|
25 |
+
parent[v] = rv;
|
26 |
+
v = pv;
|
27 |
+
}
|
28 |
+
return rv;
|
29 |
+
}
|
basic/union-set.h
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#ifndef __UNION_SET_H__
|
2 |
+
#define __UNION_SET_H__
|
3 |
+
|
4 |
+
#include <vector>
|
5 |
+
|
6 |
+
using namespace std;
|
7 |
+
|
8 |
+
struct UnionSet {
|
9 |
+
UnionSet() { }
|
10 |
+
UnionSet(int n) { Init(n); }
|
11 |
+
void Init(int n);
|
12 |
+
|
13 |
+
bool Join(int u, int v) { return Do(u, v, true); }
|
14 |
+
bool InSameSet(int u, int v) { return Do(u, v, false); }
|
15 |
+
|
16 |
+
bool Do(int u, int v, bool doit);
|
17 |
+
int GetRoot(int v);
|
18 |
+
|
19 |
+
vector<int> parent;
|
20 |
+
};
|
21 |
+
|
22 |
+
#endif
|
cluster-viewer/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
The MIT License (MIT)
|
2 |
+
|
3 |
+
Copyright (c) 2014 Chris Dyer and Brendan O'Connor
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
22 |
+
|
cluster-viewer/README.md
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This code generates an HTML viewer for the clustering tree generated, similar to [this clustering of the words in a corpus of English Twitter data](http://www.ark.cs.cmu.edu/TweetNLP/cluster_viewer.html).
|
2 |
+
|
3 |
+
## Instructions
|
4 |
+
|
5 |
+
The `wcluster` tool generates a directory with a file called `paths` that contains the bit string representations of the clustering tree, e.g.
|
6 |
+
|
7 |
+
000000 Westfalenpokalfinale 10
|
8 |
+
000000 Heimpunktspiel 10
|
9 |
+
000000 Jugendhallenturnier 10
|
10 |
+
...
|
11 |
+
|
12 |
+
The script `cluster-viewer/build-viewer.sh` creates an HTML visualization of the contents of this file. You can run it with as follows:
|
13 |
+
|
14 |
+
./cluster-viewer/build-viewer.sh corpus.out/paths
|
15 |
+
|
16 |
+
This command creates a directory called `clusters/` containing the HTML viewer. Specify an alternative directory as follows:
|
17 |
+
|
18 |
+
./cluster-viewer/build-viewer.sh corpus.out/paths /some/other/output-dir
|
19 |
+
|
20 |
+
## Requirements
|
21 |
+
|
22 |
+
* Python must be in your path
|
23 |
+
|
24 |
+
## Acknowledgements
|
25 |
+
|
26 |
+
These scripts were originally written by [Brendan O'Connor](http://brenocon.com/) and extended by [Chris Dyer](http://www.cs.cmu.edu/~cdyer/).
|
cluster-viewer/build-viewer.sh
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
set -e
|
3 |
+
|
4 |
+
CODEDIR=`dirname $0`/code
|
5 |
+
|
6 |
+
if [ "$#" -lt "1" ] || [ "$#" -gt "2" ]
|
7 |
+
then
|
8 |
+
echo "Usage: $0 path/to/clusters.out/paths [outdir]" 1>&2
|
9 |
+
echo 1>&2
|
10 |
+
echo "Builds an HTML cluster viewer." 1>&2
|
11 |
+
echo 1>&2
|
12 |
+
exit
|
13 |
+
fi
|
14 |
+
MAPFILE=$1
|
15 |
+
CATCMD=cat
|
16 |
+
if [[ "$MAPFILE" == *.gz ]]
|
17 |
+
then
|
18 |
+
CATCMD='gunzip -c'
|
19 |
+
fi
|
20 |
+
OUTDIR=clusters
|
21 |
+
if [ $# -eq 2 ]
|
22 |
+
then
|
23 |
+
OUTDIR=$2
|
24 |
+
fi
|
25 |
+
|
26 |
+
echo "Creating output in $OUTDIR ..." 1>&2
|
27 |
+
mkdir -p $OUTDIR
|
28 |
+
mkdir -p $OUTDIR/paths
|
29 |
+
$CATCMD $MAPFILE | python $CODEDIR/make_html.py $CODEDIR $OUTDIR > $OUTDIR/htmlrows.html
|
30 |
+
python $CODEDIR/final.py $CODEDIR $OUTDIR > $OUTDIR/cluster_viewer.html
|
31 |
+
echo "Done. View clusters in $OUTDIR/cluster_viewer.html" 1>&2
|
32 |
+
|
cluster-viewer/code/final.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
template = open(sys.argv[1] + '/template.html').read()
|
3 |
+
final = template
|
4 |
+
final = final.replace('STYLE', open(sys.argv[1] + '/style.css').read())
|
5 |
+
htmlrows = open(sys.argv[2] + '/htmlrows.html').read()
|
6 |
+
final = final.replace('TABLE', htmlrows)
|
7 |
+
print(final)
|
8 |
+
|
cluster-viewer/code/htmlrows.html
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
<tr>
|
3 |
+
<td class=path>^<a target=_blank href="paths/000000.html">000000</a> <span class=count>(3)</span>
|
4 |
+
<td class=words><span class=w>Westfalenpokalfinale</span> <span class=w>Heimpunktspiel</span> <span class=w>Jugendhallenturnier</span>
|
5 |
+
|
6 |
+
</tr>
|
7 |
+
|
8 |
+
<tr>
|
9 |
+
<td class=path>^<a target=_blank href="paths/0000010.html">0000010</a> <span class=count>(3)</span>
|
10 |
+
<td class=words><span class=w>Friesendorf</span> <span class=w>Fallenstellen</span> <span class=w>Strafjustizsystem</span>
|
11 |
+
|
12 |
+
</tr>
|
13 |
+
|
14 |
+
<tr>
|
15 |
+
<td class=path>^<a target=_blank href="paths/00000110.html">00000110</a> <span class=count>(3)</span>
|
16 |
+
<td class=words><span class=w>Gewerbeflächenkonzept</span> <span class=w>Musikprotokoll</span> <span class=w>Familienbetreuungszentrum</span>
|
17 |
+
|
18 |
+
</tr>
|
cluster-viewer/code/make_html.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys,itertools
|
2 |
+
|
3 |
+
style = open(sys.argv[1] + '/style.css').read()
|
4 |
+
|
5 |
+
def get_word_rows():
|
6 |
+
for line in sys.stdin:
|
7 |
+
path, word, count = line.split('\t')
|
8 |
+
count = int(count)
|
9 |
+
yield path,word,count
|
10 |
+
|
11 |
+
def get_cluster_rows():
|
12 |
+
for path, rows in itertools.groupby(get_word_rows(), key=lambda x: x[0]):
|
13 |
+
wordcounts = [(w,c) for _,w,c in rows]
|
14 |
+
wordcounts.sort(key=lambda w_c: -w_c[1])
|
15 |
+
|
16 |
+
yield path, len(wordcounts), wordcounts[:50], wordcounts
|
17 |
+
|
18 |
+
def htmlescape(s):
|
19 |
+
return s.replace('&','&').replace('<','<').replace('>','>')
|
20 |
+
|
21 |
+
def wc_table(wordcounts, tdword=''):
|
22 |
+
r = ['<table>']
|
23 |
+
for i,(w,c) in enumerate(wordcounts):
|
24 |
+
r.append('<tr><td>{} <td class="{}">{} <td class=tdcount>{:,}'.format(i+1, tdword, htmlescape(w), c))
|
25 |
+
r.append('</table>')
|
26 |
+
return '\n'.join(r)
|
27 |
+
|
28 |
+
def top(wc, th):
|
29 |
+
cutoff = int(wc[0][1] * th)
|
30 |
+
res = []
|
31 |
+
for (w,c) in wc:
|
32 |
+
if c > cutoff: res.append((w,c))
|
33 |
+
return res
|
34 |
+
|
35 |
+
for path, nwords, wordcounts, allwc in get_cluster_rows():
|
36 |
+
# wc1 = ' '.join("<span class=w>{w}</span> <span class=c>[{c}]</span>".format(
|
37 |
+
# w=htmlescape(w), c=c) for w,c in wordcounts)
|
38 |
+
wc1 = ' '.join("<span class=w>{w}</span>".format(
|
39 |
+
w=htmlescape(w)) for w,c in top(wordcounts, 0.01))
|
40 |
+
|
41 |
+
print("""
|
42 |
+
<tr>
|
43 |
+
<td class=path>^<a target=_blank href="paths/{path}.html">{path}</a> <span class=count>({nwords})</span>
|
44 |
+
<td class=words>{wc}
|
45 |
+
""".format(path=path, nwords=nwords, wc=wc1))
|
46 |
+
print("</tr>")
|
47 |
+
|
48 |
+
with open(sys.argv[2] + '/paths/{path}.html'.format(**locals()),'w') as f:
|
49 |
+
print("""<style>{style}</style>""".format(**locals()), file=f)
|
50 |
+
print("""<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">""", file=f)
|
51 |
+
print("<a href=../cluster_viewer.html>back to cluster viewer</a>", file=f)
|
52 |
+
print("<h1>cluster path {path}</h1>".format(path=path), file=f)
|
53 |
+
|
54 |
+
print("{n:,} words, {t:,} tokens".format(n=nwords, t=sum(c for w,c in allwc)), file=f)
|
55 |
+
print("<a href='#freq'>freq</a> <a href='#alpha'>alpha</a> <a href='#suffix'>suffix</a>", file=f)
|
56 |
+
|
57 |
+
print("<a name=freq><h2>Words in frequency order</h2></a>", file=f)
|
58 |
+
allwc.sort(key=lambda w_c: (-w_c[1],w_c[0]))
|
59 |
+
print(wc_table(allwc), file=f)
|
60 |
+
# wc1 = ' '.join("<span class=w>{w}</span> <span class=c>({c})</span>".format(
|
61 |
+
# w=htmlescape(w), c=c) for w,c in allwc)
|
62 |
+
# print>>f, wc1
|
63 |
+
|
64 |
+
print("<a name=alpha><h2>Words in alphabetical order</h2></a>", file=f)
|
65 |
+
allwc.sort(key=lambda w_c1: (w_c1[0],-w_c1[1]))
|
66 |
+
print(wc_table(allwc), file=f)
|
67 |
+
|
68 |
+
print("<a name=suffix><h2>Words in suffix order</h2></a>", file=f)
|
69 |
+
allwc.sort(key=lambda w_c2: (list(reversed(w_c2[0])),-w_c2[1]))
|
70 |
+
print(wc_table(allwc, tdword='suffixsort'), file=f)
|
71 |
+
# wc1 = ' '.join("<span class=w>{w}</span> <span class=c>({c})</span>".format(
|
72 |
+
# w=htmlescape(w), c=c) for w,c in allwc)
|
73 |
+
# print>>f, wc1
|
74 |
+
|
75 |
+
|
cluster-viewer/code/style.css
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
table { border-collapse:collapse; border-spacing:0; }
|
2 |
+
body { font-family: times; font-size: 11pt; }
|
3 |
+
td { border: 1px solid gray; padding:2px 8px; }
|
4 |
+
th { border: 1px solid gray; padding:2px 8px; }
|
5 |
+
.count { font-size:9pt; color: solid gray; }
|
6 |
+
.c { font-size:7pt; color: solid gray; }
|
7 |
+
.tdcount { text-align:right }
|
8 |
+
.info { font-size: 12pt; }
|
9 |
+
.suffixsort { text-align: right }
|
cluster-viewer/code/template.html
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<html>
|
2 |
+
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
3 |
+
<style>
|
4 |
+
STYLE
|
5 |
+
</style>
|
6 |
+
|
7 |
+
<h1>Word cluster viewer</h1>
|
8 |
+
|
9 |
+
<div class=info>
|
10 |
+
Word cluster viewer.
|
11 |
+
</div>
|
12 |
+
|
13 |
+
<p>
|
14 |
+
<table>
|
15 |
+
<tr>
|
16 |
+
<th>Cluster path (and word type count)
|
17 |
+
<th>Words (most frequent)
|
18 |
+
</tr>
|
19 |
+
TABLE
|
20 |
+
</table>
|
21 |
+
|
22 |
+
</html>
|
input.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
the cat chased the mouse
|
2 |
+
the dog chased the cat
|
3 |
+
the mouse chased the dog
|
output.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0 the 6
|
2 |
+
10 chased 3
|
3 |
+
110 dog 2
|
4 |
+
1110 mouse 2
|
5 |
+
1111 cat 2
|