#!/usr/local/bin/python """ [(f1, [(f2, [])]), (f3, []) ...] plan: pick time step, probably months. for each time slice, estimate business of each forum, then draw them in the right tree order. cool features: lan parties were big for a while, but then the forum completely died a few times in later years see also: http://infoviz.pnl.gov/pdf/themeriver99.pdf (description only, no code, for the NYT boxoffice thing) http://www.nytimes.com/interactive/2008/02/23/movies/20080223_REVENUE_GRAPHIC.html is the NYT boxoffice thing http://infosthetics.com/archives/2005/08/email_mountain.html related viz http://infosthetics.com/archives/2005/03/history_flow.html related viz, wikipedia authors over time """ from __future__ import division import random, pprint, colorsys, logging, cPickle as pickle from nevow import tags as T, flat from rdflib.Graph import Graph from rdflib import RDF, Literal from ns import DC, SIOC, BB from pymunk.vec2d import Vec2d log = logging.getLogger() logging.basicConfig(level=logging.DEBUG) def forumStartTime(forum): """fake start time based on id. We could get real dates from the posts""" return int(str(forum).split('=')[1]) def collectData(g): log.info("collect data") forward = {} # forum : [children]. If a forum is in here, it's not a leaf roots = [] # forums with no parents for forum in sorted(g.subjects(RDF.type, SIOC['Forum'])): if forumStartTime(forum) > 200: continue par = g.value(forum, SIOC['has_parent']) if not par: roots.append(forum) else: forward.setdefault(par, []).append(forum) log.info("read counts") g2 = Graph() g2.parse("forumCounts.nt", format="nt") stats = {} # (forum, yyyymm) : numposts for forum, _, st in g2.triples((None, BB['traffic'], None)): yyyymm = g2.value(st, BB['month']) count = int(g2.value(st, BB['numPosts'])) stats[forum, yyyymm] = count return forward, roots, stats g = Graph() log.info("read forums") g.parse("forums.nt", format="nt") if 0: forward, roots, stats = collectData(g) pickle.dump((forward, roots, stats), open("/tmp/timing.p", "w"), protocol=-1) else: log.info("load pickle") forward, roots, stats = pickle.load(open("/tmp/timing.p")) def visitForums(func, _forums=roots, levelBreakFunc=None): """func(forum) called in depth-first order after each set of siblings, we also call levelBreakFunc(parent) """ for f in _forums: func(f) visitForums(func, _forums=forward.get(f, [])) if levelBreakFunc: levelBreakFunc(f) #pprint.pprint(forward) def allYyyymm(): for year in range(1998, 2008+1): for month in range(1, 12+1): if year == 2008 and month >= 2: # month 2 is there, but data is partial continue yyyymm = "%04d-%02d" % (year, month) yield yyyymm shadowOffset = 5 widthPerMonth = 40 heightPerPost = .03 def xform(pos): return (pos[0] * widthPerMonth, pos[1] * heightPerPost + 30) def hexFromHSV(h, s, v): return "#%02x%02x%02x" % tuple(int(x * 256) for x in colorsys.hsv_to_rgb(h, s, v)) class Ribbon(object): """ if a forum has conversation for multiple months in a row, that makes one ribbon """ def __init__(self, forum): self.forum = forum self.counts = [] # (month, count, (x,y)) (in col/post coords) self.pads = {} # yyyymm: pad height (in count coords) self.handleFrac =.4 # 0 is pointy def addCount(self, yyyymm, count, pos): """ pos is top/left corner of this piece of the ribbon, in cols-posts coordinates """ self.counts.append((yyyymm, count, pos)) def __str__(self): return "ribbon %s, %s pts" % (self.forum, len(self.counts)) __repr__ = __str__ def parentHue(self): parent = g.value(self.forum, SIOC['has_parent']) if parent is not None: ret = { 'Arts' : 220, # first big stripe 'Soc' : 200, # second big stripe 'Edu' : 180, 'Rec' : 159, # green 'Region' : 108, 'Cable & Digital TV' : 32, 'Games' : 86, 'Sports' : 30, 'Music' : 10, 'Tech' : 20, 'Sys' : 58, # bottom stripe } return ret[g.value(parent, DC['title'])] / 360 return 0 def baseHue(self): return self.parentHue() def nameHash(self): return (hash(self.forum) % 1000) * .001 def baseColor(self): if str(self.forum) == "http://boards.ie/vbulletin/forumdisplay.php?f=7": return hexFromHSV(159/360, 1, .6) sat = .2 + .8 * self.nameHash() return hexFromHSV(self.baseHue(), sat, .75) def strokeColor(self): return hexFromHSV(self.baseHue(), .4, .9) def labelColor(self): return hexFromHSV(self.baseHue(), .6, .3) def labelStrokeColor(self): return hexFromHSV(self.baseHue(), .2, .9) def planPoints(self): self.cols = [] # (x,top,bottom), svg pixels tops = [c[2][1] for c in self.counts] smoothTops = [None for c in self.counts] for i, y in enumerate(tops): y = tops[i] smoothTops[i] = y for yyyymm, count, pos in self.counts: pos = xform(pos) top = pos[1] bot = pos[1] + count * heightPerPost self.cols.append((pos[0], top, bot)) def handles(self, i, topBot=0): """return the handles to the left and right of the given self.cols index. topBot = 0 for tops; 1 for bottoms """ def clamp(index): return max(0, min(len(self.cols) - 1, index)) i = clamp(i) iLeft = clamp(i - 1) iRight = clamp(i + 1) pt = Vec2d(self.cols[i][0], self.cols[i][1+topBot]) left = Vec2d(self.cols[iLeft][0], self.cols[iLeft][1+topBot]) right = Vec2d(self.cols[iRight][0], self.cols[iRight][1+topBot]) if iRight == i: right.x += widthPerMonth if iLeft == i: left.x -= widthPerMonth tangent = (right - left).normalized() * widthPerMonth * self.handleFrac return pt - tangent, pt + tangent def render(self, isShadow=False): """ svg group for this ribbon """ steps = "M %s %s" % (self.cols[0][0], self.cols[0][1]) for colNum, (x, t, b) in list(enumerate(self.cols))[1:]: handleLeft, handleRight = self.handles(colNum, topBot=0) coords = ( tuple(self.handles(colNum - 1, topBot=0)[1])+ tuple(self.handles(colNum, topBot=0)[0])+ (x, t)) steps += " C %s,%s %s,%s %s,%s" % coords steps += " L %s %s" % (self.cols[-1][0], self.cols[-1][2]) for colNum, (x, t, b) in list(enumerate(self.cols))[-2::-1]: steps += " C %s,%s %s,%s %s,%s" % ( tuple(self.handles(colNum + 1, topBot=1)[0])+ tuple(self.handles(colNum, topBot=1)[1])+ (x, b)) steps += " z" style = "fill:%s; stroke:%s" % (self.baseColor(), self.strokeColor()) if isShadow: style = "fill: black" return T.Tag('path')(d=steps, style=style) def renderBoxes(self, isShadow=False): boxes = [] for x, t, b in self.cols: boxes.append(T.Tag('rect')( x=x, y=t, width=widthPerMonth-2, height=b - t-2, style="fill:%s; stroke:%s" % (self.baseColor(), self.strokeColor()))) return boxes def botPath(self, i, j): """path data for a path that runs along the bottom from column i to j""" steps = "M %s %s" % (self.cols[i][0], self.cols[i][2]) x, _, b = self.cols[i] for colNum, (x, t, b) in list(enumerate(self.cols))[i+1:j]: handleLeft, handleRight = self.handles(colNum, topBot=1) coords = ( tuple(self.handles(colNum - 1, topBot=1)[1])+ tuple(self.handles(colNum, topBot=1)[0])+ (x, b)) steps += " C %s,%s %s,%s %s,%s" % coords steps += " L %s %s" % (x + 150, b) # a tail, in case the line was too short for text return steps def renderLabel(self, showPath=False): labelStartColumns = [] heights = [] for col, (x, top, bottom) in list(enumerate(self.cols))[1:-2] or enumerate(self.cols): heights.append((bottom-top, x, top, col)) heights.sort(reverse=True) ht, x, top, col = heights[0] y = top + ht / 2 label = g.value(self.forum, DC['title'], default='unknown') labelStartColumns = set([col]) monthsWithoutLabel = 0 minHeightForExtraLabel = 30 for col, (x, top, bottom) in list(enumerate(self.cols))[:-2]: height = bottom - top if monthsWithoutLabel > 10 and height > minHeightForExtraLabel: labelStartColumns.add(col) if col not in labelStartColumns: monthsWithoutLabel += 1 if col in labelStartColumns: monthsWithoutLabel = 0 objs = [] for startColumn in labelStartColumns: startColumn = max(0, min(startColumn, len(self.cols) - 3)) pathId = "txt%s-%s" % (id(self), startColumn) p = T.Tag('path')(id=pathId, stroke='black', fill='none', d=self.botPath(startColumn,startColumn+5)) if showPath: objs.append(T.Tag('g')[p]) else: objs.append(T.Tag('defs')[p]) tid = "%s-txt" % pathId objs.append(T.Tag('text')(id=tid, **{'font-size': '60%',})[ T.Tag('textPath')(**{'xlink:href':('#%s' % pathId)})[ T.Tag('tspan')(dy="-3")[T.raw(label)]]]) if 0: # neither svg renderer does well with this shadow objs.append(T.Tag('use')(**{ 'xlink:href': "#%s" % tid, 'stroke-width' : 4, 'stroke' : 'black', 'transform': "translate(0,%s)" % shadowOffset, 'filter': "url(#shadow)", })) objs.append(T.Tag('use')(**{'xlink:href': "#%s" % tid, 'stroke' : self.labelStrokeColor(), 'stroke-width' : '4', 'filter' : 'url(#textBorder)', 'opacity' : .5, })) objs.append(T.Tag('use')(**{'xlink:href': "#%s" % tid, 'fill' : self.labelColor(), })) return objs def clothSim(ribbons): from relax import World, Box, Anchor, Spring, pygameDrawWorld import pygame pygame.init() screen = pygame.display.set_mode((900, 600)) clock = pygame.time.Clock() running = True world = World() world.vertGap = 0 boxCol = {} # x : [(y, Box)] for r in ribbons: ribbonBoxes = [] for i, (month, count, (x,y)) in enumerate(r.counts): b = Box(x, y, 1, count) b.ribbon = r b.countIndex = i ribbonBoxes.append(b) boxCol.setdefault(x, []).append((y, b)) world.boxes.append(b) #anc1 = Anchor(.5, 3) #anc2 = Anchor(6.5, 3) for b1, b2 in zip(ribbonBoxes[:-1], ribbonBoxes[1:]): world.horizSprings.append(Spring(b1, b2)) # encourage more flatness by running longer springs too for b1, b2 in zip(ribbonBoxes[:-2], ribbonBoxes[2::]): world.horizSprings.append(Spring(b1, b2, rest=2.0)) for stack in boxCol.values(): stack.sort() for (_, b1), (_, b2) in zip(stack[:-1], stack[1:]): world.vertSprings.append(Spring(b1, b2)) dt = 1/50.0 world.measure() while running: for loop in range(75): world.step(.0002) for event in pygame.event.get(): if event.type == pygame.QUIT: running = False elif event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE: running = False screen.fill((255, 255, 255)) pygameDrawWorld(screen, world, withSprings=False) pygame.display.flip() #clock.tick(1/dt) for b in world.boxes: month, count, (x,y) = b.ribbon.counts[b.countIndex] b.ribbon.counts[b.countIndex] = (month, count, (x, b.y)) def river2(shadows=True, sim=True, labels=True): """version 4: svg ribbons todo: put solid colors of the bigger categories -behind- the ribbons shift each column: center it? try to encourage new ribbons to be flat for a while, so we can get their labels in? try to minimize local wiggling, but let the whole river wander? render this dynamically, let the user pick subforums or something? zoom into a particular month so far that we can start seeing thread subject lines? zebra-stripe the months check left edge of path. Are we losing a month? """ if sim: gap = 50 # pixels between ribbons (but then we relax these away) else: gap = 2 cols = [] maxHeight = 0 ribbons = [] prevMonthOrder = [] prevMonthRibbons = {} # forum : Ribbon colHeight = {} # yyyymm : height adds = [] # args to ribbon add calls (buffering while I get the heights) afterhours = [] for x, yyyymm in enumerate(allYyyymm()): # for testing a single time range # if not 60 <= x <= 85: continue # x -= 90 thisMonthRibbons = {} y = [0] def addIfActive(forum): count = stats.get((forum, yyyymm), 0) if count: if forum in prevMonthRibbons: r = prevMonthRibbons[forum] else: r = Ribbon(forum) ribbons.append(r) if str(forum) == "http://boards.ie/vbulletin/forumdisplay.php?f=7": afterhours.append(r) thisMonthRibbons[forum] = r adds.append((r, yyyymm, count, (x, y[0]))) y[0] = y[0] + count + gap/heightPerPost visitForums(addIfActive) prevMonthRibbons = thisMonthRibbons colHeight[yyyymm] = y[0] # for testing just one ribbon # ribbons = afterhours # ribbons = ribbons[:15] maxHeight = max(colHeight.values()) for r, yyyymm, count, (x, top) in adds: colYOffset = (maxHeight - colHeight[yyyymm]) / 2 r.addCount(yyyymm, count, (x, top + colYOffset)) if sim: clothSim(ribbons) for r in ribbons: r.planPoints() maxHeightPx = maxHeight * heightPerPost + 30 * 2 ribbonRenders = [] for r in ribbons[::-1]: if shadows: ribbonRenders.append(T.Tag('g')( transform="translate(0,%s)" % shadowOffset, filter="url(#shadow)")[r.render(True)]) ribbonRenders.append(r.render()) labelRenders = [] if labels: for r in ribbons: labelRenders.append(r.renderLabel()) maxWidth = widthPerMonth * len(list(allYyyymm())) + 600 return T.Tag('svg')(xmlns='http://www.w3.org/2000/svg', width=maxWidth, height=maxHeightPx, **{'xmlns:xlink':"http://www.w3.org/1999/xlink"})[ T.Tag('defs')[ T.Tag('filter')(id='shadow')[ T.Tag('feGaussianBlur')(stdDeviation=6), ], T.Tag('filter')(id='textBorder')[ T.Tag('feGaussianBlur')(stdDeviation=.75), ] ], T.Tag('rect')(x=0, y=0, width=maxWidth, height=maxHeightPx, fill="#262626"), T.Tag('g')[ribbonRenders], T.Tag('g')[labelRenders], ] log.info("create page") divs = river2() open("diagram.svg", "w").write(flat.flatten(divs)) page = T.html(xmlns="http://www.w3.org/1999/xhtml")[T.body[ T.style(type="text/css")[''' div { position: relative; font-size: 65%; white-space: nowrap; border: 1px solid gray; } table td { vertical-align: top; } '''], divs]] log.info("write") open("timing.html", 'w').write(flat.flatten(page))