Skip to content

Commit 120abbe

Browse files
committed
add one billion row chalage solution in ruby (may not work)
1 parent 9573f22 commit 120abbe

File tree

3 files changed

+144
-1
lines changed

3 files changed

+144
-1
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
/.idea/
1+
/.idea/
2+
1brc/1brc

1brc/main.rb

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# frozen_string_literal: true
2+
3+
require 'open3'
4+
5+
DEFAULT_NWORKERS = 6
6+
PROGRAM_NAME = 'main.rb'
7+
8+
Measurement = Struct.new(:min, :max, :sum, :count)
9+
10+
# usage: `ruby main.rb <file> <threads>`
11+
# single worker: `ruby main.rb <file> <threads> worker <index>`
12+
13+
def main
14+
mode = ARGV[2]
15+
16+
if mode == 'worker'
17+
run_worker
18+
else
19+
results = calculate_in_processes
20+
Dir['worker_data_*.bin'].each { File.delete(_1) }
21+
merge(results)
22+
end
23+
end
24+
25+
def result_file_path(process_index)
26+
"worker_data_#{process_index}.bin"
27+
end
28+
29+
def calculate_in_processes
30+
file = ARGV[0]
31+
nworkers = ARGV[1]&.to_i || DEFAULT_NWORKERS
32+
threads = (0...nworkers).map do |i|
33+
Thread.new do
34+
_, stderr_str, status = Open3.capture3("ruby --yjit #{PROGRAM_NAME} #{file} #{nworkers} worker #{i}")
35+
unless status.success?
36+
puts stderr_str
37+
# TODO: kill running processes when one is not successful
38+
exit
39+
end
40+
result =
41+
File.open(result_file_path(i), 'rb') do |inp|
42+
Marshal.load(inp)
43+
end
44+
File.delete(result_file_path(i))
45+
result
46+
end
47+
end
48+
threads.map(&:value)
49+
end
50+
51+
def merge(results)
52+
result, *other = results
53+
54+
55+
result.merge!(*other) do |_, old_value, new_value|
56+
Measurement.new(
57+
[old_value.min, new_value.min].min,
58+
[old_value.max, new_value.max].max,
59+
old_value.sum + new_value.sum,
60+
old_value.count + new_value.count
61+
)
62+
end
63+
64+
sorted = result.sort.map do |name, stats|
65+
min = stats.min.round(1)
66+
mean = (stats.sum / stats.count + 0.0000001).round(1)
67+
max = stats.max.round(1)
68+
"#{name}=#{min}/#{mean}/#{max}"
69+
end
70+
71+
print '{'
72+
print sorted.join(', ')
73+
puts '}'
74+
end
75+
76+
def run_worker
77+
file = ARGV[0]
78+
nworkers = ARGV[1].to_i
79+
index = ARGV[3].to_i
80+
81+
chunk_size = File.size(file) / nworkers
82+
offset = index * chunk_size
83+
84+
result = calculate_data(file, offset, chunk_size, index)
85+
File.open(result_file_path(index), 'wb') do |out|
86+
Marshal.dump(result, out)
87+
end
88+
end
89+
90+
def calculate_data(file, offset, chunk_size, index)
91+
skip_line = index != 0
92+
buffer = IO::Buffer.map(File.open(file), nil, 0, IO::Buffer::READONLY)
93+
data = {}
94+
95+
name = nil
96+
line_start_offset = 0
97+
number_start_offset = 0
98+
99+
if skip_line
100+
buffer.each(:U8, offset) do |byte_offset, byte|
101+
next unless byte == 10
102+
103+
offset = offset + (byte_offset - offset) + 1
104+
line_start_offset = offset
105+
break
106+
end
107+
end
108+
109+
buffer.each(:U8, offset) do |byte_offset, byte|
110+
if byte == 59 # ';'
111+
name = buffer.get_string(line_start_offset, byte_offset - line_start_offset)
112+
number_start_offset = byte_offset + 1
113+
elsif byte == 10 # '\n'
114+
value = buffer.get_string(number_start_offset, byte_offset - number_start_offset).to_f
115+
measurement = data[name]
116+
measurement ||= data[name] = Measurement.new(value, value, 0, 0)
117+
measurement.min = value if value < measurement.min
118+
measurement.max = value if value > measurement.max
119+
measurement.count += 1
120+
measurement.sum += value
121+
122+
line_start_offset = byte_offset + 1
123+
124+
break if byte_offset >= offset + chunk_size
125+
end
126+
end
127+
128+
data
129+
end
130+
131+
main

1brc/test.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
require 'open3'
2+
3+
files = Dir['1brc/src/test/resources/samples/*.txt']
4+
files_max_name_size = files.map(&:size).max
5+
files.each do |input_file|
6+
print input_file.ljust(files_max_name_size + 3, '.')
7+
expected = File.read(input_file.sub('.txt', '.out'))
8+
stdout_str, stderr_str, status = Open3.capture3("ruby main.rb #{input_file} 1")
9+
status = stdout_str.strip == expected.strip ? 'ok' : 'not ok'
10+
puts status
11+
end

0 commit comments

Comments
 (0)