[cig-commits] r7935 - cs/stats/trunk

sue at geodynamics.org sue at geodynamics.org
Wed Sep 5 15:26:43 PDT 2007


Author: sue
Date: 2007-09-05 15:26:43 -0700 (Wed, 05 Sep 2007)
New Revision: 7935

Added:
   cs/stats/trunk/Makefile
   cs/stats/trunk/get_all.cxx
Removed:
   cs/stats/trunk/get_all.py
Modified:
   cs/stats/trunk/get_users
   cs/stats/trunk/master_control
Log:
Convert get_all.py to get_all.cxx.  Fix a bug in get_users

Added: cs/stats/trunk/Makefile
===================================================================
--- cs/stats/trunk/Makefile	2007-09-05 22:24:34 UTC (rev 7934)
+++ cs/stats/trunk/Makefile	2007-09-05 22:26:43 UTC (rev 7935)
@@ -0,0 +1,2 @@
+get_all: get_all.cxx
+	g++ -O3 get_all.cxx -o get_all -lboost_filesystem

Added: cs/stats/trunk/get_all.cxx
===================================================================
--- cs/stats/trunk/get_all.cxx	2007-09-05 22:24:34 UTC (rev 7934)
+++ cs/stats/trunk/get_all.cxx	2007-09-05 22:26:43 UTC (rev 7935)
@@ -0,0 +1,221 @@
+#include <fstream>
+#include <map>
+#include <list>
+#include <string>
+#include <vector>
+#include <set>
+#include <iostream>
+#include <boost/filesystem/operations.hpp>
+#include <boost/filesystem/fstream.hpp>
+
+using namespace std;
+namespace fs=boost::filesystem;
+int main()
+{
+  map<string,set<string> > hits;
+  vector<string> remove_regex, months, years;
+  remove_regex.push_back("bot");
+  remove_regex.push_back("spider");
+  remove_regex.push_back("Yahoo! Slurp");
+  remove_regex.push_back("^131.215");
+  remove_regex.push_back("^127.0.0.1");
+
+  months.push_back("Jan");
+  months.push_back("Feb");
+  months.push_back("Mar");
+  months.push_back("Apr");
+  months.push_back("May");
+  months.push_back("Jun");
+  months.push_back("Jul");
+  months.push_back("Aug");
+  months.push_back("Sep");
+  months.push_back("Oct");
+  months.push_back("Nov");
+  months.push_back("Dec");
+
+  years.push_back("2006");
+  years.push_back("2007");
+
+  map<string,string> packages;
+  packages["MAG-1.0.0.tar.gz"]="MAG-1.0.0";
+  packages["MAG-1.0.1.tar.gz"]="MAG-1.0.1";
+  packages["MAG-1.0.2.tar.gz"]="MAG-1.0.2";
+  packages["mineos-1.0.0.tar.gz"]="Mineos-1.0.0";
+    
+  packages["Gale-1_2_1.tar.gz"]="Gale-1.2.1";
+  packages["Gale-Linux32-1_2_1.tar.gz"]="Gale-Linux32-1.2.1";
+  packages["Gale-LinuxAMD64-1_2_1.tar.gz"]="Gale-LinuxAMD64-1.2.1";
+  packages["Gale-MacIntel-1_2_1.dmg"]="Gale-MacIntel-1.2.1";
+  packages["Gale-MacPPC-1_2_1.dmg"]="Gale-MacPPC-1.2.1";
+  packages["Gale-Win32-1_2_1.zip"]="Gale-Win32-1.2.1";
+  packages["Gale-1_2_0.tar.gz"]="Gale-1.2.0";
+  packages["Gale-Linux32-1_2_0.tar.gz"]="Gale-Linux32-1.2.0";
+  packages["Gale-LinuxAMD64-1_2_0.tar.gz"]="Gale-LinuxAMD64-1.2.0";
+  packages["Gale-MacIntel-1_2_0.dmg"]="Gale-MacIntel-1.2.0";
+  packages["Gale-MacPPC-1_2_0.dmg"]="Gale-MacPPC-1.2.0";
+  packages["Gale-Win32-1_2_0.zip"]="Gale-Win32-1.2.0";
+  packages["Gale-1_1_1.tar.gz"]="Gale-1.1.1";
+  packages["Gale-Linux32_1_1_1.tar.gz"]="Gale-Linux32-1.1.1";
+  packages["Gale-LinuxAMD64_1_1_1.tar.gz"]="Gale-LinuxAMD64-1.1.1";
+  packages["Gale-MacIntel-1_1_1.dmg"]="Gale-MacIntel-1.1.1";
+  packages["Gale-MacPPC-1_1_1.dmg"]="Gale-MacPPC-1.1.1";
+  packages["Gale-Win32_1_1_1.zip"]="Gale-Win32-1.1.1";
+  packages["Gale-1_1_0.tar.gz"]="Gale-1.1.0";
+  packages["Gale-Linux32_1_1_0.tar.gz"]="Gale-Linux32-1.1.0";
+  packages["Gale-LinuxAMD64_1_1_0.tar.gz"]="Gale-LinuxAMD64-1.1.0";
+  packages["Gale-MacIntel-1_1_0.dmg"]="Gale-MacIntel-1.1.0";
+  packages["Gale-MacPPC-1_1_0.dmg"]="Gale-MacPPC-1.1.0";
+  packages["Gale-Win32_1_1_0.zip"]="Gale-Win32-1.1.0";
+  packages["Gale-1.0.0.tar.gz"]="Gale-1.0.0";
+  packages["Gale-Linux-1.0.0.tar.gz"]="Gale-Linux-1.0.0";
+  packages["Gale-Mac-1_0_0.dmg"]="Gale-Mac-1.0.0";
+  packages["Gale-Win32-1_0_0.zip"]="Gale-Win32-1.0.0";
+  packages["Gale-0.9.0.tar.gz"]="Gale-0.9.0";
+  packages["Gale-0.2.0.tar.gz"]="Gale-0.2.0";
+  packages["Gale-0.1.0.tar.gz"]="Gale-0.1.0";
+  packages["Gale-Linux.tar.gz"]="Gale-Linux-0.9";
+  packages["Gale-Mac.dmg"]="Gale-Mac-0.9";
+  packages["Gale-Win32.zip"]="Gale-Win32-0.9";
+    
+  packages["plasti-1.0.0.tar.gz"]="Plasti-1.0.0";
+    
+  packages["lithomop3d-1.0.0.tar.gz"]="LithoMop3d-1.0.0";
+  packages["lithomop3d-0.7.2.tar.gz"]="LithoMop3d-0.7.2";
+    
+  packages["pylith-1.0.1.tar.gz"]="PyLith-1.0.1";
+  packages["pylith-1.0.1-darwin-powerpc.tar.gz"]="PyLith-Mac-PowerPC-1.0.1";
+  packages["pylith-1.0.1-win-i686.exe"]="PyLith-Win-1.0.1";
+  packages["pylith-1.0.1-linux-i686.tar.gz"]="PyLith-Linux-1.0.1";
+  packages["pylith-1.0.0.tar.gz"]="PyLith-1.0.0";
+  packages["pylith-1.0.0-darwin-powerpc.tar.gz"]="PyLith-Mac-PowerPC-1.0.0";
+  packages["pylith-1.0.0-win-i686.exe"]="PyLith-Win-1.0.0";
+  packages["pylith-1.0.0-linux-i686.tar.gz"]="PyLith-Linux-1.0.0";
+  packages["pylith3d-0.8.3.tar.gz"]="PyLith3d-0.8.3";
+  packages["pylith3d-0.8.3-linux-i686.tar.gz"]="PyLith-Linux-0.8.3";
+  packages["pylith3d-0.8.3-darwin-powerpc.tar.gz"]="PyLith-Mac-0.8.3";
+  packages["pylith3d-0.8.3-win-i686.exe"]="PyLith-Win32-0.8.3";
+  packages["pylith3d-0.8.2.tar.gz"]="PyLith3d-0.8.2";
+  packages["pylith3d-0.8.1.tar.gz"]="PyLith3d-0.8.1";
+  packages["pylith3d-0.8.0.tar.gz"]="PyLith3d-0.8.0";
+  packages["PyLith-0.8p1.dmg"]="PyLith-Mac-0.8p1";
+  packages["setup.exe"]="PyLith-Win32-0.8.0";
+  packages["pylith-0.8.1-linux-x86.tar.gz"]="PyLith-Linux-0.8.1";
+    
+  packages["Ellipsis3D-1.0.2.tar.gz"]="Ellipsis3D-1.0.2";
+  packages["ellipsis3D-1.0.1.tar.gz"]="Ellipsis3D-1.0.1";
+  packages["ellipsis3D-1.0.0.tar.gz"]="Ellipsis3D-1.0.0";
+    
+  packages["cigma-0.9.0.tar.gz"]="Cigma-0.9.0";
+
+  packages["CitcomS-2.2.2.tar.gz"]="CitcomS-2.2.2";
+  packages["CitcomS-2.2.1.tar.gz"]="CitcomS-2.2.1";
+  packages["CitcomS-2.1.0.tar.gz"]="CitcomS-2.1.0";
+  packages["CitcomS-2.0.2.tar.gz"]="CitcomS-2.0.2";
+  packages["CitcomS-2.0.1.tar.gz"]="CitcomS-2.0.1";
+  packages["CitcomS-2.0.0.tar.gz"]="CitcomS-2.0.0";
+    
+  packages["CitcomCU-1.0.0.tar.gz"]="CitcomCU-1.0.0";
+  packages["CitcomCU-1.0.1.tar.gz"]="CitcomCU-1.0.1";
+  packages["CitcomCU-1.0.2.tar.gz"]="CitcomCU-1.0.2";
+  packages["CitcomCU-1.0.2-br-inflow.tar.gz"]="CitcomCU-1.0.2-br-inflow";
+
+  const int max_size=100000;
+  char line[max_size];
+
+  list<fs::path> logs;
+  /* Add the Apache log */
+  logs.push_back("/var/log/apache2/access_log");
+
+  /* Add all of the Zope logs */
+  for(fs::directory_iterator
+	i("/var/lib/zope2.7/instance/plone/log/");
+      i!=boost::filesystem::directory_iterator(); ++i)
+    {
+      if(i->leaf().substr(0,2)=="Z2")
+	logs.push_back(*i);
+    }
+
+  for(list<fs::path>::iterator log_path=logs.begin(); log_path!=logs.end();
+      ++log_path)
+    {
+      unsigned int size(log_path->leaf().size());
+      
+      fs::path file_path(*log_path);
+      string temp_file("temp_file");
+      if(size>3 && log_path->leaf().substr(size-3)==".gz")
+	{
+	  string temp_file_gz("temp_file.gz");
+	  fs::remove(temp_file_gz);
+	  fs::remove(temp_file);
+	  fs::copy_file(*log_path,temp_file_gz);
+	  if(system(("gunzip " + temp_file_gz).c_str()))
+	    {
+	      cerr << "Can not uncompress temp file for "
+		   << log_path->string() << "\n";
+	      abort();
+	    }
+	  file_path=temp_file;
+	}
+//       cout << log_path->string() << "\t"
+// 	   << file_path.string() << "\n";
+      
+      fs::ifstream log(file_path);
+      while(log)
+	{
+	  log.getline(line,max_size);
+	  string logline(line);
+	  bool valid(true);
+	  for(vector<string>::iterator i=remove_regex.begin();
+	      i!=remove_regex.end(); ++i)
+	    {
+	      if((*i)[0]=='^')
+		{
+		  if(logline.size()>=i->size()-1
+		     && logline.substr(0,i->size()-1)==i->substr(1))
+		    {
+		      valid=false;
+		      break;
+		    }
+		}
+	      else if(logline.find(*i)!=string::npos)
+		{
+		  valid=false;
+		  break;
+		}
+	    }
+	  if(valid)
+	    for(vector<string>::iterator m=months.begin();
+		m!=months.end(); ++m)
+	      for(vector<string>::iterator y=years.begin();
+		  y!=years.end(); ++y)
+		if(logline.find(*m+"/"+*y)!=string::npos)
+		  {
+		    for(map<string,string>::iterator p=packages.begin();
+			p!=packages.end(); ++p)
+		      if(logline.find(p->first)!=string::npos)
+			{
+			  int space=logline.find(' ');
+			  hits[p->first+"_"+*m+"_"+*y]
+			    .insert(logline.substr(0,space-1));
+			}
+		  }
+	}
+      fs::remove(temp_file);
+    }
+
+  for(vector<string>::iterator m=months.begin();
+      m!=months.end(); ++m)
+    for(vector<string>::iterator y=years.begin();
+	y!=years.end(); ++y)
+      for(map<string,string>::iterator p=packages.begin();
+	  p!=packages.end(); ++p)
+	{
+	  string name(p->first+"_"+*m+"_"+*y);
+	  ofstream of(name.c_str());
+	  map<string,set<string> >::iterator i=hits.find(name);
+	  if(i!=hits.end())
+	    for(set<string>::iterator j=i->second.begin();
+		j!=i->second.end(); ++j)
+	      of << *j << "\n";
+	}
+}

Deleted: cs/stats/trunk/get_all.py
===================================================================
--- cs/stats/trunk/get_all.py	2007-09-05 22:24:34 UTC (rev 7934)
+++ cs/stats/trunk/get_all.py	2007-09-05 22:26:43 UTC (rev 7935)
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-
-import gzip
-import re
-import os
-import string
-from sets import Set
-
-months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov',
-        'Dec']
-years=['2006','2007']
-
-remove_regex=['bot','spider','Yahoo! Slurp','^131\.215','^127\.0\.0\.1']
-
-from packages import *
-
-# Initialize the hits for all of the packages and months
-hits={}
-for m in months:
-    for y in years:
-        for p in packages.itervalues():
-            hits[p+"_"+m+"_"+y]=Set()
-
-# Get all of the logs to scan
-logs=['/var/log/apache2/access_log']
-zope_logs=os.listdir("/var/lib/zope2.7/instance/plone/log")
-for log in zope_logs:
-    if re.search("Z2.log",log):
-        logs+=["/var/lib/zope2.7/instance/plone/log/"+log]
-
-# Scan through the logs, using gzip if needed
-for log in logs:
-    if log[-3:]==".gz":
-        f=gzip.GzipFile(log)
-    else:
-        f=open(log)
-    # Scan the log, line by line.
-    s=f.readline()
-    while(s):
-        # Remove robots, spiders, and local hits
-        valid=True
-        for pattern in remove_regex:
-            if re.search(pattern,s):
-                valid=False
-                continue
-        if valid:
-            for m in months:
-                for y in years:
-                    if re.search(m+"/"+y,s):
-                        for p in packages.iterkeys():
-                            if re.search(p,s):
-                                ttmp=packages[p]+"_"+m+"_"+y
-                                hits[ttmp].add(string.split(s)[0])
-        s=f.readline()
-    f.close()
-
-# Write the raw ip's
-for h in hits.iterkeys():
-    f=open(h,"w")
-    for ip in hits[h]:
-        f.write(ip)
-        f.write("\n")
-    f.close()
-    

Modified: cs/stats/trunk/get_users
===================================================================
--- cs/stats/trunk/get_users	2007-09-05 22:24:34 UTC (rev 7934)
+++ cs/stats/trunk/get_users	2007-09-05 22:26:43 UTC (rev 7935)
@@ -2,11 +2,12 @@
 rm -f institutions
 current_month=`date +%b`
 current_year=`date +%G`
-for p in `ls *_$current_month_$current_year`; do
+for p in `ls *_${current_month}_${current_year}`; do
     project=`echo $p | rev | cut -d _ -f 3- | rev`
     rm -f ${project}_users ${project}_institutions ${project}_total_users
     for year in `cat years`; do
     	for month in `cat months`; do
+		echo ${project}_${month}_${year}
 		wc -l ${project}_${month}_${year} | cut -f 1 -d " " >> ${project}_users
 		rm -f users
 		for f in `cat ${project}_${month}_${year}`; do

Modified: cs/stats/trunk/master_control
===================================================================
--- cs/stats/trunk/master_control	2007-09-05 22:24:34 UTC (rev 7934)
+++ cs/stats/trunk/master_control	2007-09-05 22:26:43 UTC (rev 7935)
@@ -1,8 +1,8 @@
-./get_all.py
+./get_all
 ./get_users
 ./make_plots
 mkdir -p /home/sue/public_html/stats/plots
-./rm_empty
+./rm_empty.py
 cp *_users /home/sue/public_html/stats
 cp *_institutions /home/sue/public_html/stats
 cp *.png /home/sue/public_html/stats/plots



More information about the cig-commits mailing list