Stats and output files
This commit is contained in:
parent
f8e2893c13
commit
4c3696ee09
4 changed files with 443 additions and 0 deletions
|
|
@ -1,3 +1,9 @@
|
|||
###############################################################################
|
||||
# Please note that this is required only for scraping the data from groups using
|
||||
# python-fedora API. You do not have to fill it up if you are not going to
|
||||
# pull data of users of a particular FAS group.
|
||||
###############################################################################
|
||||
|
||||
[FAS]
|
||||
user: YourFASUser
|
||||
pass: YourFASPass
|
||||
|
|
|
|||
20
fedstats.py
Normal file
20
fedstats.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env python3
|
||||
# A class to get fedora community data from fas interfaces and Datagreapper
|
||||
# Alberto Rodriguez Sanchez bt0dotninja@fedoraproject.org
|
||||
# Renato Silva resilva87@fedoraproject.org
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
from fedora.client.fas2 import AccountSystem
|
||||
from fedora.client import AuthError
|
||||
from collections import Counter
|
||||
from datetime import datetime
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import json
|
||||
import requests
|
||||
import configparser
|
||||
278
output.py
Normal file
278
output.py
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
|
||||
from datetime import date, timedelta
|
||||
import fedmsg.meta
|
||||
import fedmsg
|
||||
import stats
|
||||
import pygal
|
||||
import math
|
||||
import json
|
||||
import csv
|
||||
import os
|
||||
|
||||
# Default global variables
|
||||
subcategory_json = None
|
||||
category_json = None
|
||||
path = ''
|
||||
filename = stats.values['user']
|
||||
csv_init = text_init = False
|
||||
mode = 'text'
|
||||
cat = None
|
||||
|
||||
|
||||
# Gets a drawable object argument and renders an SVG Image of it.
|
||||
def draw_svg(graph_obj):
|
||||
if cat is None:
|
||||
fname = "%s%s/%s_main.svg" % (path, stats.values['user'], filename)
|
||||
print("[*] Output saved to ", fname)
|
||||
else:
|
||||
fname = "%s%s/%s_%smain.svg" % (path, stats.values['user'],
|
||||
filename, cat)
|
||||
print("[*] Output saved to ", fname)
|
||||
graph_obj.render_to_file(fname)
|
||||
|
||||
|
||||
# Gets a drawable object argument and renders a PNG image of it.
|
||||
def draw_png(graph_obj):
|
||||
if cat is None:
|
||||
fname = "%s%s/%s_main.png" % (path, stats.values['user'], filename)
|
||||
else:
|
||||
fname = "%s%s/%s_%smain.png" % (path, stats.values['user'],
|
||||
filename, cat)
|
||||
graph_obj.render_to_png(fname)
|
||||
|
||||
|
||||
# Generates a drawable pie chart object from a dictionary passed.
|
||||
def draw_pie(output_json, title):
|
||||
pie_chart = pygal.Pie(inner_radius=0.4, width=500, height=500)
|
||||
pie_chart.title = str(title)
|
||||
for key in output_json:
|
||||
percent = output_json[key] / float(sum(output_json.values())) * 100
|
||||
pie_chart.add(str(key), round(percent, 2))
|
||||
return pie_chart
|
||||
|
||||
|
||||
# Generates a drawable pie chart object from a dictionary passed.
|
||||
def draw_bar(output_json, title):
|
||||
bar_chart = pygal.Bar(width=500, height=500)
|
||||
bar_chart.title = str(title)
|
||||
for key in output_json:
|
||||
bar_chart.add(str(key), output_json[key])
|
||||
return bar_chart
|
||||
|
||||
|
||||
# Generates CSV report for the user from the dictionary passed
|
||||
def save_csv(output_json):
|
||||
global csv_init, cat
|
||||
fname = "%s%s/%s_main.csv" % (path, stats.values['user'], filename)
|
||||
fout = open(fname, 'a')
|
||||
csvw = csv.writer(fout)
|
||||
|
||||
# Write the dates into CSV
|
||||
if not text_init and stats.end and stats.start:
|
||||
csvw.writerows(
|
||||
[['Start Date : ', stats.start],
|
||||
['End Date : ', stats.end],
|
||||
['']])
|
||||
csv_init = True
|
||||
# Initial heading row
|
||||
data = [['Username', 'Category', 'Activity Count', 'Percentage'], []]
|
||||
for key in output_json:
|
||||
percent = round(output_json[key] / float(sum(output_json.values())) *
|
||||
100, 2)
|
||||
if cat is not None and cat.capitalize() != key.capitalize():
|
||||
data.append([stats.values['user'],
|
||||
cat.capitalize() + "." + key.capitalize(),
|
||||
output_json[key],
|
||||
str(percent) + '%'])
|
||||
else:
|
||||
data.append([stats.values['user'], key.capitalize(),
|
||||
output_json[key], str(percent) + '%'])
|
||||
# Insert blank lines and total
|
||||
data.append([''])
|
||||
data.append(['', 'Total : ', sum(output_json.values())])
|
||||
data.append([''])
|
||||
csvw.writerows(data)
|
||||
fout.close()
|
||||
|
||||
|
||||
def show_gource(unicode_json):
|
||||
|
||||
# Thanks Ralph. Color codes taken from fedmsg2gource
|
||||
procs = [proc.__name__.lower() for proc in fedmsg.meta.processors]
|
||||
colors = ["FFFFFF", "008F37", "FF680A", "CC4E00",
|
||||
"8F0058", "8F7E00", "37008F", "7E008F"]
|
||||
n_wraps = int(math.ceil(len(procs) / float(len(colors))))
|
||||
colors = colors * n_wraps
|
||||
color_lookup = dict(zip(procs, colors))
|
||||
|
||||
fname = "%s%s/%s_main.gource" % (path, stats.values['user'], filename)
|
||||
fout = open(fname, 'w')
|
||||
for activity in unicode_json['raw_messages']:
|
||||
try:
|
||||
user = list(fedmsg.meta.msg2usernames(activity))[0]
|
||||
except IndexError:
|
||||
user = stats.values['user']
|
||||
|
||||
fout.write(u"%i|%s|A|%s|%s\n" % (
|
||||
activity['timestamp'],
|
||||
user,
|
||||
activity['topic'].split('.')[4] + " - " + activity['topic'].split('.')[3],
|
||||
color_lookup[activity['topic'].split('.')[3]],
|
||||
))
|
||||
fout.close()
|
||||
os.system("cat " + fname + " |gource --log-format custom --highlight-user "
|
||||
+ stats.values['user'] + " -c 0.5 -")
|
||||
|
||||
|
||||
# Saves category-wise text report of a user.
|
||||
def save_text_log(unicode_json):
|
||||
fname = "%s%s/%s_main.txt" % (path, stats.values['user'], filename)
|
||||
fout = open(fname, 'w')
|
||||
# Category-wise Log
|
||||
fout.write("\n\n*** Category-wise activities ***\n\n")
|
||||
for category in stats.return_categories():
|
||||
flag = True
|
||||
actcount = 0
|
||||
for activity in unicode_json['raw_messages']:
|
||||
if category == activity['topic'].split('.')[3]:
|
||||
actcount += 1
|
||||
# Print the category once
|
||||
if flag is True:
|
||||
fout.write(
|
||||
"\n\n** Category : " +
|
||||
category.capitalize() +
|
||||
" **\n")
|
||||
flag = False
|
||||
try:
|
||||
fout.write("* " + fedmsg.meta.msg2subtitle(activity).encode(
|
||||
'utf-8') + "\n")
|
||||
except AttributeError:
|
||||
pass
|
||||
fout.write("\nTotal Entries in category : " + str(actcount) + "\n")
|
||||
fout.write("\nPercentage participation in category : " +
|
||||
str(round(100 * actcount /
|
||||
float(unicode_json['total']), 2)) + "\n")
|
||||
fout.close()
|
||||
|
||||
|
||||
def save_text_metrics(output_json):
|
||||
global text_init
|
||||
fname = path + stats.values['user'] + '/' + filename + '_main.txt'
|
||||
print(fname)
|
||||
fout = open(fname, 'a')
|
||||
# Write the dates into CSV
|
||||
if not text_init and stats.end and stats.start:
|
||||
fout.write(
|
||||
[['Start Date : ', stats.start],
|
||||
['End Date : ', stats.end],
|
||||
['']])
|
||||
text_init = True
|
||||
|
||||
# Initial heading row
|
||||
data = 'Username\t\tCategory\t\tCount\t\tPercentage\n'
|
||||
for key in output_json:
|
||||
percent = round(output_json[key] / float(sum(output_json.values())) *
|
||||
100, 2)
|
||||
if cat is not None and cat.capitalize() != key.capitalize():
|
||||
data += '%s\t\t%s\t\t%d\t\t%s\n' % (
|
||||
stats.values['user'],
|
||||
cat.capitalize() + "." + key.capitalize(),
|
||||
output_json[key],
|
||||
str(percent) + '%')
|
||||
else:
|
||||
data += '%s\t\t%s\t\t%d\t\t%s\n' % (
|
||||
stats.values['user'], key.capitalize(),
|
||||
output_json[key], str(percent) + '%')
|
||||
# Insert blank lines and total
|
||||
data += '\n\n Total : %d \n' % (sum(output_json.values()))
|
||||
fout.write(data)
|
||||
fout.close()
|
||||
|
||||
|
||||
# Saves the markdown version of the text log
|
||||
def save_markdown(unicode_json):
|
||||
fname = path + stats.values['user'] + '/' + filename + '_main.md'
|
||||
fout = open(fname, 'w')
|
||||
# Category-wise Log, markdown ready
|
||||
fout.write("\n\n### Category-wise activities\n\n")
|
||||
for category in stats.return_categories():
|
||||
flag = True
|
||||
actcount = 0
|
||||
for activity in unicode_json['raw_messages']:
|
||||
if category == activity['topic'].split('.')[3]:
|
||||
actcount += 1
|
||||
# Print the category once
|
||||
if flag is True:
|
||||
fout.write(
|
||||
"\n\n#### Category : " +
|
||||
category.capitalize() +
|
||||
"\n")
|
||||
flag = False
|
||||
try:
|
||||
fout.write("* " + fedmsg.meta.msg2subtitle(activity).encode(
|
||||
'utf-8', errors='ignore') + "\n")
|
||||
except AttributeError:
|
||||
pass
|
||||
fout.write("\n* **Total Entries in category :** " +
|
||||
str(actcount) + "\n")
|
||||
fout.write("\n* **Percentage participation in category :** " +
|
||||
str(round(100 * actcount /
|
||||
float(unicode_json['total']), 2)) + "\n")
|
||||
fout.close()
|
||||
|
||||
|
||||
# Saves the JSON as a file.
|
||||
def save_json(unicode_json):
|
||||
fname = path + stats.values['user'] + '/' + filename + '_main.json'
|
||||
try:
|
||||
with open(fname, 'w') as outfile:
|
||||
json.dump(unicode_json, outfile)
|
||||
except IOError:
|
||||
print("[!] Could not write into directory. Check Permissions")
|
||||
|
||||
|
||||
# Identifies categories & generates drawable objects for the above functions.
|
||||
def generate_graph(output_json, title, category=None, gtype=None):
|
||||
global path
|
||||
if stats.group:
|
||||
path = stats.group + '/'
|
||||
if not os.path.exists(stats.group):
|
||||
os.makedirs(stats.group)
|
||||
if not os.path.exists(path + stats.values['user']):
|
||||
os.makedirs(path + stats.values['user'])
|
||||
else:
|
||||
if not os.path.exists(stats.values['user']):
|
||||
os.makedirs(stats.values['user'])
|
||||
|
||||
global cat
|
||||
cat = category
|
||||
graph_obj = None
|
||||
print('[*] Readying Output..')
|
||||
if mode.lower() == 'svg':
|
||||
if gtype == 'pie':
|
||||
graph_obj = draw_pie(output_json, title)
|
||||
elif gtype == 'bar':
|
||||
graph_obj = draw_bar(output_json, title)
|
||||
draw_svg(graph_obj)
|
||||
elif mode.lower() == 'png':
|
||||
if gtype == 'pie':
|
||||
graph_obj = draw_pie(output_json, title)
|
||||
elif gtype == 'bar':
|
||||
graph_obj = draw_bar(output_json, title)
|
||||
draw_png(graph_obj)
|
||||
elif mode.lower() == 'json':
|
||||
save_json(output_json)
|
||||
elif mode.lower() == 'text':
|
||||
if stats.log:
|
||||
save_text_log(output_json)
|
||||
else:
|
||||
save_text_metrics(output_json)
|
||||
elif mode.lower() == 'csv':
|
||||
save_csv(output_json)
|
||||
elif mode.lower() == 'markdown':
|
||||
save_markdown(output_json)
|
||||
elif mode.lower() == 'gource':
|
||||
show_gource(output_json)
|
||||
else:
|
||||
print("[!] That output mode is not supported! Check README for help.")
|
||||
|
||||
139
stats.py
Normal file
139
stats.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
import fedmsg
|
||||
import fedmsg.meta
|
||||
import calendar
|
||||
import json
|
||||
import requests
|
||||
from collections import Counter
|
||||
|
||||
# This dictionary will be passed as param to requests later
|
||||
values = dict()
|
||||
values['user'] = None
|
||||
values['delta'] = 604800
|
||||
values['rows_per_page'] = 100
|
||||
values['not_category'] = 'meetbot'
|
||||
values['page'] = 1
|
||||
values['size'] = 'small'
|
||||
category = ''
|
||||
start = ''
|
||||
group = ''
|
||||
end = ''
|
||||
logs = False
|
||||
weeks = 0
|
||||
baseurl = "https://apps.fedoraproject.org/datagrepper/raw"
|
||||
unicode_json = {}
|
||||
|
||||
|
||||
def return_epoch(time):
|
||||
if time == '':
|
||||
return ''
|
||||
tup = map(int, time.split('/'))
|
||||
l = (tup[2], tup[0], tup[1], 0, 0, 0)
|
||||
epochs = calendar.timegm(l)
|
||||
return (int(epochs))
|
||||
|
||||
# Checks if unicode_json is empty, pulls datagrepper values and returns
|
||||
# the json
|
||||
|
||||
|
||||
def return_json():
|
||||
global unicode_json
|
||||
total_pages = 1
|
||||
|
||||
# Only pull the values from datagrepper if it's the first run
|
||||
if len(unicode_json) == 0 or unicode_json['arguments']['users'][0] != values['user']:
|
||||
print('[*] Grabbing datagrepper values for user ' + values['user'] + '..')
|
||||
|
||||
# If the user is set as all, we filter it using the provided category,
|
||||
# if any
|
||||
if category != '' and values['user'] == 'all':
|
||||
values['category'] = category
|
||||
if start != '' and end != '':
|
||||
values['start'] = return_epoch(start)
|
||||
values['end'] = return_epoch(end)
|
||||
del(values['delta'])
|
||||
# If the user value is passed as all, remove it from the dict and pass
|
||||
# arguments
|
||||
if values['user'] == 'all':
|
||||
temp_dict = dict(values)
|
||||
del(temp_dict['user'])
|
||||
response = requests.get(baseurl, params=temp_dict)
|
||||
else:
|
||||
response = requests.get(baseurl, params=values)
|
||||
unicode_json = json.loads(response.text)
|
||||
total_pages = unicode_json['pages']
|
||||
print ("Total pages found : " + str(total_pages))
|
||||
total = total_pages
|
||||
# If multiple pages exist, get them all.
|
||||
while total_pages > 0:
|
||||
print(" [*] Loading Page " + str(values['page']) + "/" + str(total))
|
||||
values['page'] += 1
|
||||
response = requests.get(baseurl, params=values)
|
||||
paginated_json = json.loads(response.text)
|
||||
# Pull data from multiple pages and append them to the main JSON
|
||||
for activity in paginated_json['raw_messages']:
|
||||
unicode_json['raw_messages'].append(activity)
|
||||
total_pages -= 1
|
||||
values['page'] = 1
|
||||
return unicode_json
|
||||
|
||||
# Analyzes the JSON and return categories present as a list.
|
||||
|
||||
|
||||
def return_categories():
|
||||
cat_list = list()
|
||||
categories = Counter()
|
||||
unicode_json = return_json()
|
||||
print("[*] Identifying Categories..")
|
||||
for activity in unicode_json['raw_messages']:
|
||||
# Split the topic using . param , extract the 4th word and append
|
||||
cat_list.append(activity['topic'].split('.')[3])
|
||||
for category in cat_list:
|
||||
categories[category] += 1
|
||||
return categories
|
||||
|
||||
# Given a category, looks for subcategories in the category and returns a
|
||||
# sub-category counter.
|
||||
|
||||
|
||||
def return_subcategories(category):
|
||||
subcat_list = list()
|
||||
subcategories = Counter()
|
||||
print("[*] Identifying sub-categories..")
|
||||
for activity in unicode_json['raw_messages']:
|
||||
if category == activity['topic'].split('.')[3]:
|
||||
subcat_list.append(activity['topic'].split('.')[4])
|
||||
|
||||
# Converts the list into a counter.
|
||||
for subcategory in subcat_list:
|
||||
subcategories[subcategory] += 1
|
||||
return subcategories
|
||||
|
||||
# Gets the subcategories as a counter, analyzes it for further activities
|
||||
# Returns a counter with the found interactions
|
||||
|
||||
|
||||
def return_interactions(subcategories):
|
||||
interaction_dict = dict()
|
||||
interaction_list = list()
|
||||
|
||||
# Initializing the dictionary
|
||||
for object in subcategories:
|
||||
interaction_dict[object] = []
|
||||
|
||||
# Gathering sub-sub-categories
|
||||
for activity in unicode_json['raw_messages']:
|
||||
for object in subcategories:
|
||||
try:
|
||||
if object == activity['topic'].split('.')[4] and activity[
|
||||
'topic'].split('.')[5]:
|
||||
interaction_dict[object].append(
|
||||
activity['topic'].split('.')[5])
|
||||
except IndexError:
|
||||
print("[!] That category doesn't have any more interactions!")
|
||||
return {None: None}
|
||||
|
||||
# Changing list to a counter
|
||||
for key in interaction_dict:
|
||||
interaction_dict[key] = Counter(interaction_dict[key])
|
||||
return interaction_dict
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue